1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <linux/genalloc.h>
22 #include <net/netevent.h>
23 #include <net/neighbour.h>
24 #include <net/arp.h>
25 #include <net/inet_dscp.h>
26 #include <net/ip_fib.h>
27 #include <net/ip6_fib.h>
28 #include <net/nexthop.h>
29 #include <net/fib_rules.h>
30 #include <net/ip_tunnels.h>
31 #include <net/l3mdev.h>
32 #include <net/addrconf.h>
33 #include <net/ndisc.h>
34 #include <net/ipv6.h>
35 #include <net/fib_notifier.h>
36 #include <net/switchdev.h>
37 
38 #include "spectrum.h"
39 #include "core.h"
40 #include "reg.h"
41 #include "spectrum_cnt.h"
42 #include "spectrum_dpipe.h"
43 #include "spectrum_ipip.h"
44 #include "spectrum_mr.h"
45 #include "spectrum_mr_tcam.h"
46 #include "spectrum_router.h"
47 #include "spectrum_span.h"
48 
49 struct mlxsw_sp_fib;
50 struct mlxsw_sp_vr;
51 struct mlxsw_sp_lpm_tree;
52 struct mlxsw_sp_rif_ops;
53 
54 struct mlxsw_sp_rif {
55 	struct list_head nexthop_list;
56 	struct list_head neigh_list;
57 	struct net_device *dev; /* NULL for underlay RIF */
58 	struct mlxsw_sp_fid *fid;
59 	unsigned char addr[ETH_ALEN];
60 	int mtu;
61 	u16 rif_index;
62 	u8 mac_profile_id;
63 	u8 rif_entries;
64 	u16 vr_id;
65 	const struct mlxsw_sp_rif_ops *ops;
66 	struct mlxsw_sp *mlxsw_sp;
67 
68 	unsigned int counter_ingress;
69 	bool counter_ingress_valid;
70 	unsigned int counter_egress;
71 	bool counter_egress_valid;
72 };
73 
74 static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
75 {
76 	return rif->dev;
77 }
78 
79 struct mlxsw_sp_rif_params {
80 	struct net_device *dev;
81 	union {
82 		u16 system_port;
83 		u16 lag_id;
84 	};
85 	u16 vid;
86 	bool lag;
87 	bool double_entry;
88 };
89 
90 struct mlxsw_sp_rif_subport {
91 	struct mlxsw_sp_rif common;
92 	refcount_t ref_count;
93 	union {
94 		u16 system_port;
95 		u16 lag_id;
96 	};
97 	u16 vid;
98 	bool lag;
99 };
100 
101 struct mlxsw_sp_rif_ipip_lb {
102 	struct mlxsw_sp_rif common;
103 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
104 	u16 ul_vr_id;	/* Spectrum-1. */
105 	u16 ul_rif_id;	/* Spectrum-2+. */
106 };
107 
108 struct mlxsw_sp_rif_params_ipip_lb {
109 	struct mlxsw_sp_rif_params common;
110 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
111 };
112 
113 struct mlxsw_sp_rif_ops {
114 	enum mlxsw_sp_rif_type type;
115 	size_t rif_size;
116 
117 	void (*setup)(struct mlxsw_sp_rif *rif,
118 		      const struct mlxsw_sp_rif_params *params);
119 	int (*configure)(struct mlxsw_sp_rif *rif,
120 			 struct netlink_ext_ack *extack);
121 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
122 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
123 					 struct netlink_ext_ack *extack);
124 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
125 };
126 
127 struct mlxsw_sp_rif_mac_profile {
128 	unsigned char mac_prefix[ETH_ALEN];
129 	refcount_t ref_count;
130 	u8 id;
131 };
132 
133 struct mlxsw_sp_router_ops {
134 	int (*init)(struct mlxsw_sp *mlxsw_sp);
135 	int (*ipips_init)(struct mlxsw_sp *mlxsw_sp);
136 };
137 
138 static struct mlxsw_sp_rif *
139 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
140 			 const struct net_device *dev);
141 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
142 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
143 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
144 				  struct mlxsw_sp_lpm_tree *lpm_tree);
145 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
146 				     const struct mlxsw_sp_fib *fib,
147 				     u8 tree_id);
148 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
149 				       const struct mlxsw_sp_fib *fib);
150 
151 static unsigned int *
152 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
153 			   enum mlxsw_sp_rif_counter_dir dir)
154 {
155 	switch (dir) {
156 	case MLXSW_SP_RIF_COUNTER_EGRESS:
157 		return &rif->counter_egress;
158 	case MLXSW_SP_RIF_COUNTER_INGRESS:
159 		return &rif->counter_ingress;
160 	}
161 	return NULL;
162 }
163 
164 static bool
165 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
166 			       enum mlxsw_sp_rif_counter_dir dir)
167 {
168 	switch (dir) {
169 	case MLXSW_SP_RIF_COUNTER_EGRESS:
170 		return rif->counter_egress_valid;
171 	case MLXSW_SP_RIF_COUNTER_INGRESS:
172 		return rif->counter_ingress_valid;
173 	}
174 	return false;
175 }
176 
177 static void
178 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
179 			       enum mlxsw_sp_rif_counter_dir dir,
180 			       bool valid)
181 {
182 	switch (dir) {
183 	case MLXSW_SP_RIF_COUNTER_EGRESS:
184 		rif->counter_egress_valid = valid;
185 		break;
186 	case MLXSW_SP_RIF_COUNTER_INGRESS:
187 		rif->counter_ingress_valid = valid;
188 		break;
189 	}
190 }
191 
192 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
193 				     unsigned int counter_index, bool enable,
194 				     enum mlxsw_sp_rif_counter_dir dir)
195 {
196 	char ritr_pl[MLXSW_REG_RITR_LEN];
197 	bool is_egress = false;
198 	int err;
199 
200 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
201 		is_egress = true;
202 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
203 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
204 	if (err)
205 		return err;
206 
207 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
208 				    is_egress);
209 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
210 }
211 
212 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
213 				   struct mlxsw_sp_rif *rif,
214 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
215 {
216 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
217 	unsigned int *p_counter_index;
218 	bool valid;
219 	int err;
220 
221 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
222 	if (!valid)
223 		return -EINVAL;
224 
225 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
226 	if (!p_counter_index)
227 		return -EINVAL;
228 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
229 			     MLXSW_REG_RICNT_OPCODE_NOP);
230 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
231 	if (err)
232 		return err;
233 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
234 	return 0;
235 }
236 
237 struct mlxsw_sp_rif_counter_set_basic {
238 	u64 good_unicast_packets;
239 	u64 good_multicast_packets;
240 	u64 good_broadcast_packets;
241 	u64 good_unicast_bytes;
242 	u64 good_multicast_bytes;
243 	u64 good_broadcast_bytes;
244 	u64 error_packets;
245 	u64 discard_packets;
246 	u64 error_bytes;
247 	u64 discard_bytes;
248 };
249 
250 static int
251 mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif,
252 				 enum mlxsw_sp_rif_counter_dir dir,
253 				 struct mlxsw_sp_rif_counter_set_basic *set)
254 {
255 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
256 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
257 	unsigned int *p_counter_index;
258 	int err;
259 
260 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
261 		return -EINVAL;
262 
263 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
264 	if (!p_counter_index)
265 		return -EINVAL;
266 
267 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
268 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
269 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
270 	if (err)
271 		return err;
272 
273 	if (!set)
274 		return 0;
275 
276 #define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME)				\
277 		(set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl))
278 
279 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets);
280 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets);
281 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets);
282 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes);
283 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes);
284 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes);
285 	MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets);
286 	MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets);
287 	MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes);
288 	MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes);
289 
290 #undef MLXSW_SP_RIF_COUNTER_EXTRACT
291 
292 	return 0;
293 }
294 
295 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
296 				      unsigned int counter_index)
297 {
298 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
299 
300 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
301 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
302 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
303 }
304 
305 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
306 			       enum mlxsw_sp_rif_counter_dir dir)
307 {
308 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
309 	unsigned int *p_counter_index;
310 	int err;
311 
312 	if (mlxsw_sp_rif_counter_valid_get(rif, dir))
313 		return 0;
314 
315 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
316 	if (!p_counter_index)
317 		return -EINVAL;
318 
319 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
320 				     p_counter_index);
321 	if (err)
322 		return err;
323 
324 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
325 	if (err)
326 		goto err_counter_clear;
327 
328 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
329 					*p_counter_index, true, dir);
330 	if (err)
331 		goto err_counter_edit;
332 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
333 	return 0;
334 
335 err_counter_edit:
336 err_counter_clear:
337 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
338 			      *p_counter_index);
339 	return err;
340 }
341 
342 void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
343 			       enum mlxsw_sp_rif_counter_dir dir)
344 {
345 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
346 	unsigned int *p_counter_index;
347 
348 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
349 		return;
350 
351 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
352 	if (WARN_ON(!p_counter_index))
353 		return;
354 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
355 				  *p_counter_index, false, dir);
356 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
357 			      *p_counter_index);
358 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
359 }
360 
361 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
362 {
363 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
364 	struct devlink *devlink;
365 
366 	devlink = priv_to_devlink(mlxsw_sp->core);
367 	if (!devlink_dpipe_table_counter_enabled(devlink,
368 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
369 		return;
370 	mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
371 }
372 
373 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
374 {
375 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
376 }
377 
378 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
379 
380 struct mlxsw_sp_prefix_usage {
381 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
382 };
383 
384 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
385 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
386 
387 static bool
388 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
389 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
390 {
391 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
392 }
393 
394 static void
395 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
396 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
397 {
398 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
399 }
400 
401 static void
402 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
403 			  unsigned char prefix_len)
404 {
405 	set_bit(prefix_len, prefix_usage->b);
406 }
407 
408 static void
409 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
410 			    unsigned char prefix_len)
411 {
412 	clear_bit(prefix_len, prefix_usage->b);
413 }
414 
415 struct mlxsw_sp_fib_key {
416 	unsigned char addr[sizeof(struct in6_addr)];
417 	unsigned char prefix_len;
418 };
419 
420 enum mlxsw_sp_fib_entry_type {
421 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
422 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
423 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
424 	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
425 	MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
426 
427 	/* This is a special case of local delivery, where a packet should be
428 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
429 	 * because that's a type of next hop, not of FIB entry. (There can be
430 	 * several next hops in a REMOTE entry, and some of them may be
431 	 * encapsulating entries.)
432 	 */
433 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
434 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
435 };
436 
437 struct mlxsw_sp_nexthop_group_info;
438 struct mlxsw_sp_nexthop_group;
439 struct mlxsw_sp_fib_entry;
440 
441 struct mlxsw_sp_fib_node {
442 	struct mlxsw_sp_fib_entry *fib_entry;
443 	struct list_head list;
444 	struct rhash_head ht_node;
445 	struct mlxsw_sp_fib *fib;
446 	struct mlxsw_sp_fib_key key;
447 };
448 
449 struct mlxsw_sp_fib_entry_decap {
450 	struct mlxsw_sp_ipip_entry *ipip_entry;
451 	u32 tunnel_index;
452 };
453 
454 struct mlxsw_sp_fib_entry {
455 	struct mlxsw_sp_fib_node *fib_node;
456 	enum mlxsw_sp_fib_entry_type type;
457 	struct list_head nexthop_group_node;
458 	struct mlxsw_sp_nexthop_group *nh_group;
459 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
460 };
461 
462 struct mlxsw_sp_fib4_entry {
463 	struct mlxsw_sp_fib_entry common;
464 	struct fib_info *fi;
465 	u32 tb_id;
466 	dscp_t dscp;
467 	u8 type;
468 };
469 
470 struct mlxsw_sp_fib6_entry {
471 	struct mlxsw_sp_fib_entry common;
472 	struct list_head rt6_list;
473 	unsigned int nrt6;
474 };
475 
476 struct mlxsw_sp_rt6 {
477 	struct list_head list;
478 	struct fib6_info *rt;
479 };
480 
481 struct mlxsw_sp_lpm_tree {
482 	u8 id; /* tree ID */
483 	unsigned int ref_count;
484 	enum mlxsw_sp_l3proto proto;
485 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
486 	struct mlxsw_sp_prefix_usage prefix_usage;
487 };
488 
489 struct mlxsw_sp_fib {
490 	struct rhashtable ht;
491 	struct list_head node_list;
492 	struct mlxsw_sp_vr *vr;
493 	struct mlxsw_sp_lpm_tree *lpm_tree;
494 	enum mlxsw_sp_l3proto proto;
495 };
496 
497 struct mlxsw_sp_vr {
498 	u16 id; /* virtual router ID */
499 	u32 tb_id; /* kernel fib table id */
500 	unsigned int rif_count;
501 	struct mlxsw_sp_fib *fib4;
502 	struct mlxsw_sp_fib *fib6;
503 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
504 	struct mlxsw_sp_rif *ul_rif;
505 	refcount_t ul_rif_refcnt;
506 };
507 
508 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
509 
510 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
511 						struct mlxsw_sp_vr *vr,
512 						enum mlxsw_sp_l3proto proto)
513 {
514 	struct mlxsw_sp_lpm_tree *lpm_tree;
515 	struct mlxsw_sp_fib *fib;
516 	int err;
517 
518 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
519 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
520 	if (!fib)
521 		return ERR_PTR(-ENOMEM);
522 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
523 	if (err)
524 		goto err_rhashtable_init;
525 	INIT_LIST_HEAD(&fib->node_list);
526 	fib->proto = proto;
527 	fib->vr = vr;
528 	fib->lpm_tree = lpm_tree;
529 	mlxsw_sp_lpm_tree_hold(lpm_tree);
530 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
531 	if (err)
532 		goto err_lpm_tree_bind;
533 	return fib;
534 
535 err_lpm_tree_bind:
536 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
537 err_rhashtable_init:
538 	kfree(fib);
539 	return ERR_PTR(err);
540 }
541 
542 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
543 				 struct mlxsw_sp_fib *fib)
544 {
545 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
546 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
547 	WARN_ON(!list_empty(&fib->node_list));
548 	rhashtable_destroy(&fib->ht);
549 	kfree(fib);
550 }
551 
552 static struct mlxsw_sp_lpm_tree *
553 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
554 {
555 	static struct mlxsw_sp_lpm_tree *lpm_tree;
556 	int i;
557 
558 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
559 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
560 		if (lpm_tree->ref_count == 0)
561 			return lpm_tree;
562 	}
563 	return NULL;
564 }
565 
566 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
567 				   struct mlxsw_sp_lpm_tree *lpm_tree)
568 {
569 	char ralta_pl[MLXSW_REG_RALTA_LEN];
570 
571 	mlxsw_reg_ralta_pack(ralta_pl, true,
572 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
573 			     lpm_tree->id);
574 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
575 }
576 
577 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
578 				   struct mlxsw_sp_lpm_tree *lpm_tree)
579 {
580 	char ralta_pl[MLXSW_REG_RALTA_LEN];
581 
582 	mlxsw_reg_ralta_pack(ralta_pl, false,
583 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
584 			     lpm_tree->id);
585 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
586 }
587 
588 static int
589 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
590 				  struct mlxsw_sp_prefix_usage *prefix_usage,
591 				  struct mlxsw_sp_lpm_tree *lpm_tree)
592 {
593 	char ralst_pl[MLXSW_REG_RALST_LEN];
594 	u8 root_bin = 0;
595 	u8 prefix;
596 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
597 
598 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
599 		root_bin = prefix;
600 
601 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
602 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
603 		if (prefix == 0)
604 			continue;
605 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
606 					 MLXSW_REG_RALST_BIN_NO_CHILD);
607 		last_prefix = prefix;
608 	}
609 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
610 }
611 
612 static struct mlxsw_sp_lpm_tree *
613 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
614 			 struct mlxsw_sp_prefix_usage *prefix_usage,
615 			 enum mlxsw_sp_l3proto proto)
616 {
617 	struct mlxsw_sp_lpm_tree *lpm_tree;
618 	int err;
619 
620 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
621 	if (!lpm_tree)
622 		return ERR_PTR(-EBUSY);
623 	lpm_tree->proto = proto;
624 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
625 	if (err)
626 		return ERR_PTR(err);
627 
628 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
629 						lpm_tree);
630 	if (err)
631 		goto err_left_struct_set;
632 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
633 	       sizeof(lpm_tree->prefix_usage));
634 	memset(&lpm_tree->prefix_ref_count, 0,
635 	       sizeof(lpm_tree->prefix_ref_count));
636 	lpm_tree->ref_count = 1;
637 	return lpm_tree;
638 
639 err_left_struct_set:
640 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
641 	return ERR_PTR(err);
642 }
643 
644 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
645 				      struct mlxsw_sp_lpm_tree *lpm_tree)
646 {
647 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
648 }
649 
650 static struct mlxsw_sp_lpm_tree *
651 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
652 		      struct mlxsw_sp_prefix_usage *prefix_usage,
653 		      enum mlxsw_sp_l3proto proto)
654 {
655 	struct mlxsw_sp_lpm_tree *lpm_tree;
656 	int i;
657 
658 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
659 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
660 		if (lpm_tree->ref_count != 0 &&
661 		    lpm_tree->proto == proto &&
662 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
663 					     prefix_usage)) {
664 			mlxsw_sp_lpm_tree_hold(lpm_tree);
665 			return lpm_tree;
666 		}
667 	}
668 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
669 }
670 
671 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
672 {
673 	lpm_tree->ref_count++;
674 }
675 
676 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
677 				  struct mlxsw_sp_lpm_tree *lpm_tree)
678 {
679 	if (--lpm_tree->ref_count == 0)
680 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
681 }
682 
683 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
684 
685 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
686 {
687 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
688 	struct mlxsw_sp_lpm_tree *lpm_tree;
689 	u64 max_trees;
690 	int err, i;
691 
692 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
693 		return -EIO;
694 
695 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
696 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
697 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
698 					     sizeof(struct mlxsw_sp_lpm_tree),
699 					     GFP_KERNEL);
700 	if (!mlxsw_sp->router->lpm.trees)
701 		return -ENOMEM;
702 
703 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
704 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
705 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
706 	}
707 
708 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
709 					 MLXSW_SP_L3_PROTO_IPV4);
710 	if (IS_ERR(lpm_tree)) {
711 		err = PTR_ERR(lpm_tree);
712 		goto err_ipv4_tree_get;
713 	}
714 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
715 
716 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
717 					 MLXSW_SP_L3_PROTO_IPV6);
718 	if (IS_ERR(lpm_tree)) {
719 		err = PTR_ERR(lpm_tree);
720 		goto err_ipv6_tree_get;
721 	}
722 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
723 
724 	return 0;
725 
726 err_ipv6_tree_get:
727 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
728 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
729 err_ipv4_tree_get:
730 	kfree(mlxsw_sp->router->lpm.trees);
731 	return err;
732 }
733 
734 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
735 {
736 	struct mlxsw_sp_lpm_tree *lpm_tree;
737 
738 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
739 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
740 
741 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
742 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
743 
744 	kfree(mlxsw_sp->router->lpm.trees);
745 }
746 
747 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
748 {
749 	return !!vr->fib4 || !!vr->fib6 ||
750 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
751 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
752 }
753 
754 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
755 {
756 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
757 	struct mlxsw_sp_vr *vr;
758 	int i;
759 
760 	for (i = 0; i < max_vrs; i++) {
761 		vr = &mlxsw_sp->router->vrs[i];
762 		if (!mlxsw_sp_vr_is_used(vr))
763 			return vr;
764 	}
765 	return NULL;
766 }
767 
768 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
769 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
770 {
771 	char raltb_pl[MLXSW_REG_RALTB_LEN];
772 
773 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
774 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
775 			     tree_id);
776 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
777 }
778 
779 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
780 				       const struct mlxsw_sp_fib *fib)
781 {
782 	char raltb_pl[MLXSW_REG_RALTB_LEN];
783 
784 	/* Bind to tree 0 which is default */
785 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
786 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
787 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
788 }
789 
790 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
791 {
792 	/* For our purpose, squash main, default and local tables into one */
793 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
794 		tb_id = RT_TABLE_MAIN;
795 	return tb_id;
796 }
797 
798 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
799 					    u32 tb_id)
800 {
801 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
802 	struct mlxsw_sp_vr *vr;
803 	int i;
804 
805 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
806 
807 	for (i = 0; i < max_vrs; i++) {
808 		vr = &mlxsw_sp->router->vrs[i];
809 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
810 			return vr;
811 	}
812 	return NULL;
813 }
814 
815 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
816 				u16 *vr_id)
817 {
818 	struct mlxsw_sp_vr *vr;
819 	int err = 0;
820 
821 	mutex_lock(&mlxsw_sp->router->lock);
822 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
823 	if (!vr) {
824 		err = -ESRCH;
825 		goto out;
826 	}
827 	*vr_id = vr->id;
828 out:
829 	mutex_unlock(&mlxsw_sp->router->lock);
830 	return err;
831 }
832 
833 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
834 					    enum mlxsw_sp_l3proto proto)
835 {
836 	switch (proto) {
837 	case MLXSW_SP_L3_PROTO_IPV4:
838 		return vr->fib4;
839 	case MLXSW_SP_L3_PROTO_IPV6:
840 		return vr->fib6;
841 	}
842 	return NULL;
843 }
844 
845 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
846 					      u32 tb_id,
847 					      struct netlink_ext_ack *extack)
848 {
849 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
850 	struct mlxsw_sp_fib *fib4;
851 	struct mlxsw_sp_fib *fib6;
852 	struct mlxsw_sp_vr *vr;
853 	int err;
854 
855 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
856 	if (!vr) {
857 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
858 		return ERR_PTR(-EBUSY);
859 	}
860 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
861 	if (IS_ERR(fib4))
862 		return ERR_CAST(fib4);
863 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
864 	if (IS_ERR(fib6)) {
865 		err = PTR_ERR(fib6);
866 		goto err_fib6_create;
867 	}
868 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
869 					     MLXSW_SP_L3_PROTO_IPV4);
870 	if (IS_ERR(mr4_table)) {
871 		err = PTR_ERR(mr4_table);
872 		goto err_mr4_table_create;
873 	}
874 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
875 					     MLXSW_SP_L3_PROTO_IPV6);
876 	if (IS_ERR(mr6_table)) {
877 		err = PTR_ERR(mr6_table);
878 		goto err_mr6_table_create;
879 	}
880 
881 	vr->fib4 = fib4;
882 	vr->fib6 = fib6;
883 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
884 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
885 	vr->tb_id = tb_id;
886 	return vr;
887 
888 err_mr6_table_create:
889 	mlxsw_sp_mr_table_destroy(mr4_table);
890 err_mr4_table_create:
891 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
892 err_fib6_create:
893 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
894 	return ERR_PTR(err);
895 }
896 
897 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
898 				struct mlxsw_sp_vr *vr)
899 {
900 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
901 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
902 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
903 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
904 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
905 	vr->fib6 = NULL;
906 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
907 	vr->fib4 = NULL;
908 }
909 
910 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
911 					   struct netlink_ext_ack *extack)
912 {
913 	struct mlxsw_sp_vr *vr;
914 
915 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
916 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
917 	if (!vr)
918 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
919 	return vr;
920 }
921 
922 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
923 {
924 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
925 	    list_empty(&vr->fib6->node_list) &&
926 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
927 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
928 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
929 }
930 
931 static bool
932 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
933 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
934 {
935 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
936 
937 	if (!mlxsw_sp_vr_is_used(vr))
938 		return false;
939 	if (fib->lpm_tree->id == tree_id)
940 		return true;
941 	return false;
942 }
943 
944 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
945 					struct mlxsw_sp_fib *fib,
946 					struct mlxsw_sp_lpm_tree *new_tree)
947 {
948 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
949 	int err;
950 
951 	fib->lpm_tree = new_tree;
952 	mlxsw_sp_lpm_tree_hold(new_tree);
953 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
954 	if (err)
955 		goto err_tree_bind;
956 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
957 	return 0;
958 
959 err_tree_bind:
960 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
961 	fib->lpm_tree = old_tree;
962 	return err;
963 }
964 
965 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
966 					 struct mlxsw_sp_fib *fib,
967 					 struct mlxsw_sp_lpm_tree *new_tree)
968 {
969 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
970 	enum mlxsw_sp_l3proto proto = fib->proto;
971 	struct mlxsw_sp_lpm_tree *old_tree;
972 	u8 old_id, new_id = new_tree->id;
973 	struct mlxsw_sp_vr *vr;
974 	int i, err;
975 
976 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
977 	old_id = old_tree->id;
978 
979 	for (i = 0; i < max_vrs; i++) {
980 		vr = &mlxsw_sp->router->vrs[i];
981 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
982 			continue;
983 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
984 						   mlxsw_sp_vr_fib(vr, proto),
985 						   new_tree);
986 		if (err)
987 			goto err_tree_replace;
988 	}
989 
990 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
991 	       sizeof(new_tree->prefix_ref_count));
992 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
993 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
994 
995 	return 0;
996 
997 err_tree_replace:
998 	for (i--; i >= 0; i--) {
999 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
1000 			continue;
1001 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1002 					     mlxsw_sp_vr_fib(vr, proto),
1003 					     old_tree);
1004 	}
1005 	return err;
1006 }
1007 
1008 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1009 {
1010 	struct mlxsw_sp_vr *vr;
1011 	u64 max_vrs;
1012 	int i;
1013 
1014 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1015 		return -EIO;
1016 
1017 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1018 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1019 					GFP_KERNEL);
1020 	if (!mlxsw_sp->router->vrs)
1021 		return -ENOMEM;
1022 
1023 	for (i = 0; i < max_vrs; i++) {
1024 		vr = &mlxsw_sp->router->vrs[i];
1025 		vr->id = i;
1026 	}
1027 
1028 	return 0;
1029 }
1030 
1031 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1032 
1033 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1034 {
1035 	/* At this stage we're guaranteed not to have new incoming
1036 	 * FIB notifications and the work queue is free from FIBs
1037 	 * sitting on top of mlxsw netdevs. However, we can still
1038 	 * have other FIBs queued. Flush the queue before flushing
1039 	 * the device's tables. No need for locks, as we're the only
1040 	 * writer.
1041 	 */
1042 	mlxsw_core_flush_owq();
1043 	mlxsw_sp_router_fib_flush(mlxsw_sp);
1044 	kfree(mlxsw_sp->router->vrs);
1045 }
1046 
1047 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1048 {
1049 	struct net_device *d;
1050 	u32 tb_id;
1051 
1052 	rcu_read_lock();
1053 	d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1054 	if (d)
1055 		tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1056 	else
1057 		tb_id = RT_TABLE_MAIN;
1058 	rcu_read_unlock();
1059 
1060 	return tb_id;
1061 }
1062 
1063 static struct mlxsw_sp_rif *
1064 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1065 		    const struct mlxsw_sp_rif_params *params,
1066 		    struct netlink_ext_ack *extack);
1067 
1068 static struct mlxsw_sp_rif_ipip_lb *
1069 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1070 				enum mlxsw_sp_ipip_type ipipt,
1071 				struct net_device *ol_dev,
1072 				struct netlink_ext_ack *extack)
1073 {
1074 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1075 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1076 	struct mlxsw_sp_rif *rif;
1077 
1078 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1079 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1080 		.common.dev = ol_dev,
1081 		.common.lag = false,
1082 		.common.double_entry = ipip_ops->double_rif_entry,
1083 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1084 	};
1085 
1086 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1087 	if (IS_ERR(rif))
1088 		return ERR_CAST(rif);
1089 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1090 }
1091 
1092 static struct mlxsw_sp_ipip_entry *
1093 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1094 			  enum mlxsw_sp_ipip_type ipipt,
1095 			  struct net_device *ol_dev)
1096 {
1097 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1098 	struct mlxsw_sp_ipip_entry *ipip_entry;
1099 	struct mlxsw_sp_ipip_entry *ret = NULL;
1100 	int err;
1101 
1102 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1103 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1104 	if (!ipip_entry)
1105 		return ERR_PTR(-ENOMEM);
1106 
1107 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1108 							    ol_dev, NULL);
1109 	if (IS_ERR(ipip_entry->ol_lb)) {
1110 		ret = ERR_CAST(ipip_entry->ol_lb);
1111 		goto err_ol_ipip_lb_create;
1112 	}
1113 
1114 	ipip_entry->ipipt = ipipt;
1115 	ipip_entry->ol_dev = ol_dev;
1116 	ipip_entry->parms = ipip_ops->parms_init(ol_dev);
1117 
1118 	err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry);
1119 	if (err) {
1120 		ret = ERR_PTR(err);
1121 		goto err_rem_ip_addr_set;
1122 	}
1123 
1124 	return ipip_entry;
1125 
1126 err_rem_ip_addr_set:
1127 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1128 err_ol_ipip_lb_create:
1129 	kfree(ipip_entry);
1130 	return ret;
1131 }
1132 
1133 static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp,
1134 					struct mlxsw_sp_ipip_entry *ipip_entry)
1135 {
1136 	const struct mlxsw_sp_ipip_ops *ipip_ops =
1137 		mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1138 
1139 	ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry);
1140 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1141 	kfree(ipip_entry);
1142 }
1143 
1144 static bool
1145 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1146 				  const enum mlxsw_sp_l3proto ul_proto,
1147 				  union mlxsw_sp_l3addr saddr,
1148 				  u32 ul_tb_id,
1149 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1150 {
1151 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1152 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1153 	union mlxsw_sp_l3addr tun_saddr;
1154 
1155 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1156 		return false;
1157 
1158 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1159 	return tun_ul_tb_id == ul_tb_id &&
1160 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1161 }
1162 
1163 static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp,
1164 						 enum mlxsw_sp_ipip_type ipipt)
1165 {
1166 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1167 
1168 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1169 
1170 	/* Not all tunnels require to increase the default pasing depth
1171 	 * (96 bytes).
1172 	 */
1173 	if (ipip_ops->inc_parsing_depth)
1174 		return mlxsw_sp_parsing_depth_inc(mlxsw_sp);
1175 
1176 	return 0;
1177 }
1178 
1179 static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp,
1180 						  enum mlxsw_sp_ipip_type ipipt)
1181 {
1182 	const struct mlxsw_sp_ipip_ops *ipip_ops =
1183 		mlxsw_sp->router->ipip_ops_arr[ipipt];
1184 
1185 	if (ipip_ops->inc_parsing_depth)
1186 		mlxsw_sp_parsing_depth_dec(mlxsw_sp);
1187 }
1188 
1189 static int
1190 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1191 			      struct mlxsw_sp_fib_entry *fib_entry,
1192 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1193 {
1194 	u32 tunnel_index;
1195 	int err;
1196 
1197 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1198 				  1, &tunnel_index);
1199 	if (err)
1200 		return err;
1201 
1202 	err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp,
1203 						    ipip_entry->ipipt);
1204 	if (err)
1205 		goto err_parsing_depth_inc;
1206 
1207 	ipip_entry->decap_fib_entry = fib_entry;
1208 	fib_entry->decap.ipip_entry = ipip_entry;
1209 	fib_entry->decap.tunnel_index = tunnel_index;
1210 
1211 	return 0;
1212 
1213 err_parsing_depth_inc:
1214 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
1215 			   fib_entry->decap.tunnel_index);
1216 	return err;
1217 }
1218 
1219 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1220 					  struct mlxsw_sp_fib_entry *fib_entry)
1221 {
1222 	enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt;
1223 
1224 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1225 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1226 	fib_entry->decap.ipip_entry = NULL;
1227 	mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt);
1228 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1229 			   1, fib_entry->decap.tunnel_index);
1230 }
1231 
1232 static struct mlxsw_sp_fib_node *
1233 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1234 			 size_t addr_len, unsigned char prefix_len);
1235 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1236 				     struct mlxsw_sp_fib_entry *fib_entry);
1237 
1238 static void
1239 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1240 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1241 {
1242 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1243 
1244 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1245 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1246 
1247 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1248 }
1249 
1250 static void
1251 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1252 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1253 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1254 {
1255 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1256 					  ipip_entry))
1257 		return;
1258 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1259 
1260 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1261 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1262 }
1263 
1264 static struct mlxsw_sp_fib_entry *
1265 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1266 				     enum mlxsw_sp_l3proto proto,
1267 				     const union mlxsw_sp_l3addr *addr,
1268 				     enum mlxsw_sp_fib_entry_type type)
1269 {
1270 	struct mlxsw_sp_fib_node *fib_node;
1271 	unsigned char addr_prefix_len;
1272 	struct mlxsw_sp_fib *fib;
1273 	struct mlxsw_sp_vr *vr;
1274 	const void *addrp;
1275 	size_t addr_len;
1276 	u32 addr4;
1277 
1278 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1279 	if (!vr)
1280 		return NULL;
1281 	fib = mlxsw_sp_vr_fib(vr, proto);
1282 
1283 	switch (proto) {
1284 	case MLXSW_SP_L3_PROTO_IPV4:
1285 		addr4 = be32_to_cpu(addr->addr4);
1286 		addrp = &addr4;
1287 		addr_len = 4;
1288 		addr_prefix_len = 32;
1289 		break;
1290 	case MLXSW_SP_L3_PROTO_IPV6:
1291 		addrp = &addr->addr6;
1292 		addr_len = 16;
1293 		addr_prefix_len = 128;
1294 		break;
1295 	default:
1296 		WARN_ON(1);
1297 		return NULL;
1298 	}
1299 
1300 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1301 					    addr_prefix_len);
1302 	if (!fib_node || fib_node->fib_entry->type != type)
1303 		return NULL;
1304 
1305 	return fib_node->fib_entry;
1306 }
1307 
1308 /* Given an IPIP entry, find the corresponding decap route. */
1309 static struct mlxsw_sp_fib_entry *
1310 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1311 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1312 {
1313 	static struct mlxsw_sp_fib_node *fib_node;
1314 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1315 	unsigned char saddr_prefix_len;
1316 	union mlxsw_sp_l3addr saddr;
1317 	struct mlxsw_sp_fib *ul_fib;
1318 	struct mlxsw_sp_vr *ul_vr;
1319 	const void *saddrp;
1320 	size_t saddr_len;
1321 	u32 ul_tb_id;
1322 	u32 saddr4;
1323 
1324 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1325 
1326 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1327 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1328 	if (!ul_vr)
1329 		return NULL;
1330 
1331 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1332 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1333 					   ipip_entry->ol_dev);
1334 
1335 	switch (ipip_ops->ul_proto) {
1336 	case MLXSW_SP_L3_PROTO_IPV4:
1337 		saddr4 = be32_to_cpu(saddr.addr4);
1338 		saddrp = &saddr4;
1339 		saddr_len = 4;
1340 		saddr_prefix_len = 32;
1341 		break;
1342 	case MLXSW_SP_L3_PROTO_IPV6:
1343 		saddrp = &saddr.addr6;
1344 		saddr_len = 16;
1345 		saddr_prefix_len = 128;
1346 		break;
1347 	default:
1348 		WARN_ON(1);
1349 		return NULL;
1350 	}
1351 
1352 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1353 					    saddr_prefix_len);
1354 	if (!fib_node ||
1355 	    fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1356 		return NULL;
1357 
1358 	return fib_node->fib_entry;
1359 }
1360 
1361 static struct mlxsw_sp_ipip_entry *
1362 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1363 			   enum mlxsw_sp_ipip_type ipipt,
1364 			   struct net_device *ol_dev)
1365 {
1366 	struct mlxsw_sp_ipip_entry *ipip_entry;
1367 
1368 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1369 	if (IS_ERR(ipip_entry))
1370 		return ipip_entry;
1371 
1372 	list_add_tail(&ipip_entry->ipip_list_node,
1373 		      &mlxsw_sp->router->ipip_list);
1374 
1375 	return ipip_entry;
1376 }
1377 
1378 static void
1379 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1380 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1381 {
1382 	list_del(&ipip_entry->ipip_list_node);
1383 	mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry);
1384 }
1385 
1386 static bool
1387 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1388 				  const struct net_device *ul_dev,
1389 				  enum mlxsw_sp_l3proto ul_proto,
1390 				  union mlxsw_sp_l3addr ul_dip,
1391 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1392 {
1393 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1394 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1395 
1396 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1397 		return false;
1398 
1399 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1400 						 ul_tb_id, ipip_entry);
1401 }
1402 
1403 /* Given decap parameters, find the corresponding IPIP entry. */
1404 static struct mlxsw_sp_ipip_entry *
1405 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1406 				  enum mlxsw_sp_l3proto ul_proto,
1407 				  union mlxsw_sp_l3addr ul_dip)
1408 {
1409 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1410 	struct net_device *ul_dev;
1411 
1412 	rcu_read_lock();
1413 
1414 	ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1415 	if (!ul_dev)
1416 		goto out_unlock;
1417 
1418 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1419 			    ipip_list_node)
1420 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1421 						      ul_proto, ul_dip,
1422 						      ipip_entry))
1423 			goto out_unlock;
1424 
1425 	rcu_read_unlock();
1426 
1427 	return NULL;
1428 
1429 out_unlock:
1430 	rcu_read_unlock();
1431 	return ipip_entry;
1432 }
1433 
1434 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1435 				      const struct net_device *dev,
1436 				      enum mlxsw_sp_ipip_type *p_type)
1437 {
1438 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1439 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1440 	enum mlxsw_sp_ipip_type ipipt;
1441 
1442 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1443 		ipip_ops = router->ipip_ops_arr[ipipt];
1444 		if (dev->type == ipip_ops->dev_type) {
1445 			if (p_type)
1446 				*p_type = ipipt;
1447 			return true;
1448 		}
1449 	}
1450 	return false;
1451 }
1452 
1453 static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1454 				       const struct net_device *dev)
1455 {
1456 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1457 }
1458 
1459 static struct mlxsw_sp_ipip_entry *
1460 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1461 				   const struct net_device *ol_dev)
1462 {
1463 	struct mlxsw_sp_ipip_entry *ipip_entry;
1464 
1465 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1466 			    ipip_list_node)
1467 		if (ipip_entry->ol_dev == ol_dev)
1468 			return ipip_entry;
1469 
1470 	return NULL;
1471 }
1472 
1473 static struct mlxsw_sp_ipip_entry *
1474 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1475 				   const struct net_device *ul_dev,
1476 				   struct mlxsw_sp_ipip_entry *start)
1477 {
1478 	struct mlxsw_sp_ipip_entry *ipip_entry;
1479 
1480 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1481 					ipip_list_node);
1482 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1483 				     ipip_list_node) {
1484 		struct net_device *ol_dev = ipip_entry->ol_dev;
1485 		struct net_device *ipip_ul_dev;
1486 
1487 		rcu_read_lock();
1488 		ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1489 		rcu_read_unlock();
1490 
1491 		if (ipip_ul_dev == ul_dev)
1492 			return ipip_entry;
1493 	}
1494 
1495 	return NULL;
1496 }
1497 
1498 static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1499 				       const struct net_device *dev)
1500 {
1501 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1502 }
1503 
1504 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1505 						const struct net_device *ol_dev,
1506 						enum mlxsw_sp_ipip_type ipipt)
1507 {
1508 	const struct mlxsw_sp_ipip_ops *ops
1509 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1510 
1511 	return ops->can_offload(mlxsw_sp, ol_dev);
1512 }
1513 
1514 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1515 						struct net_device *ol_dev)
1516 {
1517 	enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1518 	struct mlxsw_sp_ipip_entry *ipip_entry;
1519 	enum mlxsw_sp_l3proto ul_proto;
1520 	union mlxsw_sp_l3addr saddr;
1521 	u32 ul_tb_id;
1522 
1523 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1524 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1525 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1526 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1527 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1528 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1529 							  saddr, ul_tb_id,
1530 							  NULL)) {
1531 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1532 								ol_dev);
1533 			if (IS_ERR(ipip_entry))
1534 				return PTR_ERR(ipip_entry);
1535 		}
1536 	}
1537 
1538 	return 0;
1539 }
1540 
1541 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1542 						   struct net_device *ol_dev)
1543 {
1544 	struct mlxsw_sp_ipip_entry *ipip_entry;
1545 
1546 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1547 	if (ipip_entry)
1548 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1549 }
1550 
1551 static void
1552 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1553 				struct mlxsw_sp_ipip_entry *ipip_entry)
1554 {
1555 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1556 
1557 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1558 	if (decap_fib_entry)
1559 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1560 						  decap_fib_entry);
1561 }
1562 
1563 static int
1564 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1565 			u16 ul_rif_id, bool enable)
1566 {
1567 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1568 	struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
1569 	enum mlxsw_reg_ritr_loopback_ipip_options ipip_options;
1570 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1571 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1572 	char ritr_pl[MLXSW_REG_RITR_LEN];
1573 	struct in6_addr *saddr6;
1574 	u32 saddr4;
1575 
1576 	ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET;
1577 	switch (lb_cf.ul_protocol) {
1578 	case MLXSW_SP_L3_PROTO_IPV4:
1579 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1580 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1581 				    rif->rif_index, rif->vr_id, dev->mtu);
1582 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1583 						   ipip_options, ul_vr_id,
1584 						   ul_rif_id, saddr4,
1585 						   lb_cf.okey);
1586 		break;
1587 
1588 	case MLXSW_SP_L3_PROTO_IPV6:
1589 		saddr6 = &lb_cf.saddr.addr6;
1590 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1591 				    rif->rif_index, rif->vr_id, dev->mtu);
1592 		mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt,
1593 						   ipip_options, ul_vr_id,
1594 						   ul_rif_id, saddr6,
1595 						   lb_cf.okey);
1596 		break;
1597 	}
1598 
1599 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1600 }
1601 
1602 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1603 						 struct net_device *ol_dev)
1604 {
1605 	struct mlxsw_sp_ipip_entry *ipip_entry;
1606 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1607 	int err = 0;
1608 
1609 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1610 	if (ipip_entry) {
1611 		lb_rif = ipip_entry->ol_lb;
1612 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1613 					      lb_rif->ul_rif_id, true);
1614 		if (err)
1615 			goto out;
1616 		lb_rif->common.mtu = ol_dev->mtu;
1617 	}
1618 
1619 out:
1620 	return err;
1621 }
1622 
1623 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1624 						struct net_device *ol_dev)
1625 {
1626 	struct mlxsw_sp_ipip_entry *ipip_entry;
1627 
1628 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1629 	if (ipip_entry)
1630 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1631 }
1632 
1633 static void
1634 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1635 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1636 {
1637 	if (ipip_entry->decap_fib_entry)
1638 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1639 }
1640 
1641 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1642 						  struct net_device *ol_dev)
1643 {
1644 	struct mlxsw_sp_ipip_entry *ipip_entry;
1645 
1646 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1647 	if (ipip_entry)
1648 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1649 }
1650 
1651 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1652 					 struct mlxsw_sp_rif *old_rif,
1653 					 struct mlxsw_sp_rif *new_rif);
1654 static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp,
1655 					 struct mlxsw_sp_rif *old_rif,
1656 					 struct mlxsw_sp_rif *new_rif,
1657 					 bool migrate_nhs)
1658 {
1659 	if (migrate_nhs)
1660 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, old_rif, new_rif);
1661 
1662 	mlxsw_sp_rif_destroy(old_rif);
1663 }
1664 
1665 static int
1666 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1667 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1668 				 bool keep_encap,
1669 				 struct netlink_ext_ack *extack)
1670 {
1671 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1672 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1673 
1674 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1675 						     ipip_entry->ipipt,
1676 						     ipip_entry->ol_dev,
1677 						     extack);
1678 	if (IS_ERR(new_lb_rif))
1679 		return PTR_ERR(new_lb_rif);
1680 	ipip_entry->ol_lb = new_lb_rif;
1681 
1682 	mlxsw_sp_rif_migrate_destroy(mlxsw_sp, &old_lb_rif->common,
1683 				     &new_lb_rif->common, keep_encap);
1684 	return 0;
1685 }
1686 
1687 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1688 					struct mlxsw_sp_rif *rif);
1689 
1690 /**
1691  * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1692  * @mlxsw_sp: mlxsw_sp.
1693  * @ipip_entry: IPIP entry.
1694  * @recreate_loopback: Recreates the associated loopback RIF.
1695  * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1696  *              relevant when recreate_loopback is true.
1697  * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1698  *                   is only relevant when recreate_loopback is false.
1699  * @extack: extack.
1700  *
1701  * Return: Non-zero value on failure.
1702  */
1703 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1704 					struct mlxsw_sp_ipip_entry *ipip_entry,
1705 					bool recreate_loopback,
1706 					bool keep_encap,
1707 					bool update_nexthops,
1708 					struct netlink_ext_ack *extack)
1709 {
1710 	int err;
1711 
1712 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1713 	 * recreate it. That creates a window of opportunity where RALUE and
1714 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1715 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1716 	 * of RALUE, demote the decap route back.
1717 	 */
1718 	if (ipip_entry->decap_fib_entry)
1719 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1720 
1721 	if (recreate_loopback) {
1722 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1723 						       keep_encap, extack);
1724 		if (err)
1725 			return err;
1726 	} else if (update_nexthops) {
1727 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1728 					    &ipip_entry->ol_lb->common);
1729 	}
1730 
1731 	if (ipip_entry->ol_dev->flags & IFF_UP)
1732 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1733 
1734 	return 0;
1735 }
1736 
1737 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1738 						struct net_device *ol_dev,
1739 						struct netlink_ext_ack *extack)
1740 {
1741 	struct mlxsw_sp_ipip_entry *ipip_entry =
1742 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1743 
1744 	if (!ipip_entry)
1745 		return 0;
1746 
1747 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1748 						   true, false, false, extack);
1749 }
1750 
1751 static int
1752 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1753 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1754 				     struct net_device *ul_dev,
1755 				     bool *demote_this,
1756 				     struct netlink_ext_ack *extack)
1757 {
1758 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1759 	enum mlxsw_sp_l3proto ul_proto;
1760 	union mlxsw_sp_l3addr saddr;
1761 
1762 	/* Moving underlay to a different VRF might cause local address
1763 	 * conflict, and the conflicting tunnels need to be demoted.
1764 	 */
1765 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1766 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1767 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1768 						 saddr, ul_tb_id,
1769 						 ipip_entry)) {
1770 		*demote_this = true;
1771 		return 0;
1772 	}
1773 
1774 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1775 						   true, true, false, extack);
1776 }
1777 
1778 static int
1779 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1780 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1781 				    struct net_device *ul_dev)
1782 {
1783 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1784 						   false, false, true, NULL);
1785 }
1786 
1787 static int
1788 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1789 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1790 				      struct net_device *ul_dev)
1791 {
1792 	/* A down underlay device causes encapsulated packets to not be
1793 	 * forwarded, but decap still works. So refresh next hops without
1794 	 * touching anything else.
1795 	 */
1796 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1797 						   false, false, true, NULL);
1798 }
1799 
1800 static int
1801 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1802 					struct net_device *ol_dev,
1803 					struct netlink_ext_ack *extack)
1804 {
1805 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1806 	struct mlxsw_sp_ipip_entry *ipip_entry;
1807 	int err;
1808 
1809 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1810 	if (!ipip_entry)
1811 		/* A change might make a tunnel eligible for offloading, but
1812 		 * that is currently not implemented. What falls to slow path
1813 		 * stays there.
1814 		 */
1815 		return 0;
1816 
1817 	/* A change might make a tunnel not eligible for offloading. */
1818 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1819 						 ipip_entry->ipipt)) {
1820 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1821 		return 0;
1822 	}
1823 
1824 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1825 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1826 	return err;
1827 }
1828 
1829 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1830 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1831 {
1832 	struct net_device *ol_dev = ipip_entry->ol_dev;
1833 
1834 	if (ol_dev->flags & IFF_UP)
1835 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1836 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1837 }
1838 
1839 /* The configuration where several tunnels have the same local address in the
1840  * same underlay table needs special treatment in the HW. That is currently not
1841  * implemented in the driver. This function finds and demotes the first tunnel
1842  * with a given source address, except the one passed in the argument
1843  * `except'.
1844  */
1845 bool
1846 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1847 				     enum mlxsw_sp_l3proto ul_proto,
1848 				     union mlxsw_sp_l3addr saddr,
1849 				     u32 ul_tb_id,
1850 				     const struct mlxsw_sp_ipip_entry *except)
1851 {
1852 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1853 
1854 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1855 				 ipip_list_node) {
1856 		if (ipip_entry != except &&
1857 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1858 						      ul_tb_id, ipip_entry)) {
1859 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1860 			return true;
1861 		}
1862 	}
1863 
1864 	return false;
1865 }
1866 
1867 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1868 						     struct net_device *ul_dev)
1869 {
1870 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1871 
1872 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1873 				 ipip_list_node) {
1874 		struct net_device *ol_dev = ipip_entry->ol_dev;
1875 		struct net_device *ipip_ul_dev;
1876 
1877 		rcu_read_lock();
1878 		ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1879 		rcu_read_unlock();
1880 		if (ipip_ul_dev == ul_dev)
1881 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1882 	}
1883 }
1884 
1885 static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1886 					    struct net_device *ol_dev,
1887 					    unsigned long event,
1888 					    struct netdev_notifier_info *info)
1889 {
1890 	struct netdev_notifier_changeupper_info *chup;
1891 	struct netlink_ext_ack *extack;
1892 	int err = 0;
1893 
1894 	switch (event) {
1895 	case NETDEV_REGISTER:
1896 		err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1897 		break;
1898 	case NETDEV_UNREGISTER:
1899 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1900 		break;
1901 	case NETDEV_UP:
1902 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1903 		break;
1904 	case NETDEV_DOWN:
1905 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1906 		break;
1907 	case NETDEV_CHANGEUPPER:
1908 		chup = container_of(info, typeof(*chup), info);
1909 		extack = info->extack;
1910 		if (netif_is_l3_master(chup->upper_dev))
1911 			err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1912 								   ol_dev,
1913 								   extack);
1914 		break;
1915 	case NETDEV_CHANGE:
1916 		extack = info->extack;
1917 		err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1918 							      ol_dev, extack);
1919 		break;
1920 	case NETDEV_CHANGEMTU:
1921 		err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1922 		break;
1923 	}
1924 	return err;
1925 }
1926 
1927 static int
1928 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1929 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1930 				   struct net_device *ul_dev,
1931 				   bool *demote_this,
1932 				   unsigned long event,
1933 				   struct netdev_notifier_info *info)
1934 {
1935 	struct netdev_notifier_changeupper_info *chup;
1936 	struct netlink_ext_ack *extack;
1937 
1938 	switch (event) {
1939 	case NETDEV_CHANGEUPPER:
1940 		chup = container_of(info, typeof(*chup), info);
1941 		extack = info->extack;
1942 		if (netif_is_l3_master(chup->upper_dev))
1943 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1944 								    ipip_entry,
1945 								    ul_dev,
1946 								    demote_this,
1947 								    extack);
1948 		break;
1949 
1950 	case NETDEV_UP:
1951 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1952 							   ul_dev);
1953 	case NETDEV_DOWN:
1954 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1955 							     ipip_entry,
1956 							     ul_dev);
1957 	}
1958 	return 0;
1959 }
1960 
1961 static int
1962 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1963 				 struct net_device *ul_dev,
1964 				 unsigned long event,
1965 				 struct netdev_notifier_info *info)
1966 {
1967 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1968 	int err;
1969 
1970 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1971 								ul_dev,
1972 								ipip_entry))) {
1973 		struct mlxsw_sp_ipip_entry *prev;
1974 		bool demote_this = false;
1975 
1976 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1977 							 ul_dev, &demote_this,
1978 							 event, info);
1979 		if (err) {
1980 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1981 								 ul_dev);
1982 			return err;
1983 		}
1984 
1985 		if (demote_this) {
1986 			if (list_is_first(&ipip_entry->ipip_list_node,
1987 					  &mlxsw_sp->router->ipip_list))
1988 				prev = NULL;
1989 			else
1990 				/* This can't be cached from previous iteration,
1991 				 * because that entry could be gone now.
1992 				 */
1993 				prev = list_prev_entry(ipip_entry,
1994 						       ipip_list_node);
1995 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1996 			ipip_entry = prev;
1997 		}
1998 	}
1999 
2000 	return 0;
2001 }
2002 
2003 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2004 				      enum mlxsw_sp_l3proto ul_proto,
2005 				      const union mlxsw_sp_l3addr *ul_sip,
2006 				      u32 tunnel_index)
2007 {
2008 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2009 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2010 	struct mlxsw_sp_fib_entry *fib_entry;
2011 	int err = 0;
2012 
2013 	mutex_lock(&mlxsw_sp->router->lock);
2014 
2015 	if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
2016 		err = -EINVAL;
2017 		goto out;
2018 	}
2019 
2020 	router->nve_decap_config.ul_tb_id = ul_tb_id;
2021 	router->nve_decap_config.tunnel_index = tunnel_index;
2022 	router->nve_decap_config.ul_proto = ul_proto;
2023 	router->nve_decap_config.ul_sip = *ul_sip;
2024 	router->nve_decap_config.valid = true;
2025 
2026 	/* It is valid to create a tunnel with a local IP and only later
2027 	 * assign this IP address to a local interface
2028 	 */
2029 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2030 							 ul_proto, ul_sip,
2031 							 type);
2032 	if (!fib_entry)
2033 		goto out;
2034 
2035 	fib_entry->decap.tunnel_index = tunnel_index;
2036 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2037 
2038 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2039 	if (err)
2040 		goto err_fib_entry_update;
2041 
2042 	goto out;
2043 
2044 err_fib_entry_update:
2045 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2046 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2047 out:
2048 	mutex_unlock(&mlxsw_sp->router->lock);
2049 	return err;
2050 }
2051 
2052 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2053 				      enum mlxsw_sp_l3proto ul_proto,
2054 				      const union mlxsw_sp_l3addr *ul_sip)
2055 {
2056 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2057 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2058 	struct mlxsw_sp_fib_entry *fib_entry;
2059 
2060 	mutex_lock(&mlxsw_sp->router->lock);
2061 
2062 	if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2063 		goto out;
2064 
2065 	router->nve_decap_config.valid = false;
2066 
2067 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2068 							 ul_proto, ul_sip,
2069 							 type);
2070 	if (!fib_entry)
2071 		goto out;
2072 
2073 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2074 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2075 out:
2076 	mutex_unlock(&mlxsw_sp->router->lock);
2077 }
2078 
2079 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2080 					 u32 ul_tb_id,
2081 					 enum mlxsw_sp_l3proto ul_proto,
2082 					 const union mlxsw_sp_l3addr *ul_sip)
2083 {
2084 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2085 
2086 	return router->nve_decap_config.valid &&
2087 	       router->nve_decap_config.ul_tb_id == ul_tb_id &&
2088 	       router->nve_decap_config.ul_proto == ul_proto &&
2089 	       !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2090 		       sizeof(*ul_sip));
2091 }
2092 
2093 struct mlxsw_sp_neigh_key {
2094 	struct neighbour *n;
2095 };
2096 
2097 struct mlxsw_sp_neigh_entry {
2098 	struct list_head rif_list_node;
2099 	struct rhash_head ht_node;
2100 	struct mlxsw_sp_neigh_key key;
2101 	u16 rif;
2102 	bool connected;
2103 	unsigned char ha[ETH_ALEN];
2104 	struct list_head nexthop_list; /* list of nexthops using
2105 					* this neigh entry
2106 					*/
2107 	struct list_head nexthop_neighs_list_node;
2108 	unsigned int counter_index;
2109 	bool counter_valid;
2110 };
2111 
2112 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2113 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2114 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2115 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
2116 };
2117 
2118 struct mlxsw_sp_neigh_entry *
2119 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2120 			struct mlxsw_sp_neigh_entry *neigh_entry)
2121 {
2122 	if (!neigh_entry) {
2123 		if (list_empty(&rif->neigh_list))
2124 			return NULL;
2125 		else
2126 			return list_first_entry(&rif->neigh_list,
2127 						typeof(*neigh_entry),
2128 						rif_list_node);
2129 	}
2130 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2131 		return NULL;
2132 	return list_next_entry(neigh_entry, rif_list_node);
2133 }
2134 
2135 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2136 {
2137 	return neigh_entry->key.n->tbl->family;
2138 }
2139 
2140 unsigned char *
2141 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2142 {
2143 	return neigh_entry->ha;
2144 }
2145 
2146 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2147 {
2148 	struct neighbour *n;
2149 
2150 	n = neigh_entry->key.n;
2151 	return ntohl(*((__be32 *) n->primary_key));
2152 }
2153 
2154 struct in6_addr *
2155 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2156 {
2157 	struct neighbour *n;
2158 
2159 	n = neigh_entry->key.n;
2160 	return (struct in6_addr *) &n->primary_key;
2161 }
2162 
2163 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2164 			       struct mlxsw_sp_neigh_entry *neigh_entry,
2165 			       u64 *p_counter)
2166 {
2167 	if (!neigh_entry->counter_valid)
2168 		return -EINVAL;
2169 
2170 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2171 					 p_counter, NULL);
2172 }
2173 
2174 static struct mlxsw_sp_neigh_entry *
2175 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2176 			   u16 rif)
2177 {
2178 	struct mlxsw_sp_neigh_entry *neigh_entry;
2179 
2180 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2181 	if (!neigh_entry)
2182 		return NULL;
2183 
2184 	neigh_entry->key.n = n;
2185 	neigh_entry->rif = rif;
2186 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2187 
2188 	return neigh_entry;
2189 }
2190 
2191 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2192 {
2193 	kfree(neigh_entry);
2194 }
2195 
2196 static int
2197 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2198 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2199 {
2200 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2201 				      &neigh_entry->ht_node,
2202 				      mlxsw_sp_neigh_ht_params);
2203 }
2204 
2205 static void
2206 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2207 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2208 {
2209 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2210 			       &neigh_entry->ht_node,
2211 			       mlxsw_sp_neigh_ht_params);
2212 }
2213 
2214 static bool
2215 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2216 				    struct mlxsw_sp_neigh_entry *neigh_entry)
2217 {
2218 	struct devlink *devlink;
2219 	const char *table_name;
2220 
2221 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2222 	case AF_INET:
2223 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2224 		break;
2225 	case AF_INET6:
2226 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2227 		break;
2228 	default:
2229 		WARN_ON(1);
2230 		return false;
2231 	}
2232 
2233 	devlink = priv_to_devlink(mlxsw_sp->core);
2234 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2235 }
2236 
2237 static void
2238 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2239 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2240 {
2241 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2242 		return;
2243 
2244 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2245 		return;
2246 
2247 	neigh_entry->counter_valid = true;
2248 }
2249 
2250 static void
2251 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2252 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2253 {
2254 	if (!neigh_entry->counter_valid)
2255 		return;
2256 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2257 				   neigh_entry->counter_index);
2258 	neigh_entry->counter_valid = false;
2259 }
2260 
2261 static struct mlxsw_sp_neigh_entry *
2262 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2263 {
2264 	struct mlxsw_sp_neigh_entry *neigh_entry;
2265 	struct mlxsw_sp_rif *rif;
2266 	int err;
2267 
2268 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2269 	if (!rif)
2270 		return ERR_PTR(-EINVAL);
2271 
2272 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2273 	if (!neigh_entry)
2274 		return ERR_PTR(-ENOMEM);
2275 
2276 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2277 	if (err)
2278 		goto err_neigh_entry_insert;
2279 
2280 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2281 	atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count);
2282 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2283 
2284 	return neigh_entry;
2285 
2286 err_neigh_entry_insert:
2287 	mlxsw_sp_neigh_entry_free(neigh_entry);
2288 	return ERR_PTR(err);
2289 }
2290 
2291 static void
2292 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2293 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2294 {
2295 	list_del(&neigh_entry->rif_list_node);
2296 	atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count);
2297 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2298 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2299 	mlxsw_sp_neigh_entry_free(neigh_entry);
2300 }
2301 
2302 static struct mlxsw_sp_neigh_entry *
2303 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2304 {
2305 	struct mlxsw_sp_neigh_key key;
2306 
2307 	key.n = n;
2308 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2309 				      &key, mlxsw_sp_neigh_ht_params);
2310 }
2311 
2312 static void
2313 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2314 {
2315 	unsigned long interval;
2316 
2317 #if IS_ENABLED(CONFIG_IPV6)
2318 	interval = min_t(unsigned long,
2319 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2320 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2321 #else
2322 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2323 #endif
2324 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2325 }
2326 
2327 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2328 						   char *rauhtd_pl,
2329 						   int ent_index)
2330 {
2331 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2332 	struct net_device *dev;
2333 	struct neighbour *n;
2334 	__be32 dipn;
2335 	u32 dip;
2336 	u16 rif;
2337 
2338 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2339 
2340 	if (WARN_ON_ONCE(rif >= max_rifs))
2341 		return;
2342 	if (!mlxsw_sp->router->rifs[rif]) {
2343 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2344 		return;
2345 	}
2346 
2347 	dipn = htonl(dip);
2348 	dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2349 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2350 	if (!n)
2351 		return;
2352 
2353 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2354 	neigh_event_send(n, NULL);
2355 	neigh_release(n);
2356 }
2357 
2358 #if IS_ENABLED(CONFIG_IPV6)
2359 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2360 						   char *rauhtd_pl,
2361 						   int rec_index)
2362 {
2363 	struct net_device *dev;
2364 	struct neighbour *n;
2365 	struct in6_addr dip;
2366 	u16 rif;
2367 
2368 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2369 					 (char *) &dip);
2370 
2371 	if (!mlxsw_sp->router->rifs[rif]) {
2372 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2373 		return;
2374 	}
2375 
2376 	dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2377 	n = neigh_lookup(&nd_tbl, &dip, dev);
2378 	if (!n)
2379 		return;
2380 
2381 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2382 	neigh_event_send(n, NULL);
2383 	neigh_release(n);
2384 }
2385 #else
2386 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2387 						   char *rauhtd_pl,
2388 						   int rec_index)
2389 {
2390 }
2391 #endif
2392 
2393 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2394 						   char *rauhtd_pl,
2395 						   int rec_index)
2396 {
2397 	u8 num_entries;
2398 	int i;
2399 
2400 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2401 								rec_index);
2402 	/* Hardware starts counting at 0, so add 1. */
2403 	num_entries++;
2404 
2405 	/* Each record consists of several neighbour entries. */
2406 	for (i = 0; i < num_entries; i++) {
2407 		int ent_index;
2408 
2409 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2410 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2411 						       ent_index);
2412 	}
2413 
2414 }
2415 
2416 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2417 						   char *rauhtd_pl,
2418 						   int rec_index)
2419 {
2420 	/* One record contains one entry. */
2421 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2422 					       rec_index);
2423 }
2424 
2425 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2426 					      char *rauhtd_pl, int rec_index)
2427 {
2428 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2429 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2430 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2431 						       rec_index);
2432 		break;
2433 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2434 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2435 						       rec_index);
2436 		break;
2437 	}
2438 }
2439 
2440 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2441 {
2442 	u8 num_rec, last_rec_index, num_entries;
2443 
2444 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2445 	last_rec_index = num_rec - 1;
2446 
2447 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2448 		return false;
2449 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2450 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2451 		return true;
2452 
2453 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2454 								last_rec_index);
2455 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2456 		return true;
2457 	return false;
2458 }
2459 
2460 static int
2461 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2462 				       char *rauhtd_pl,
2463 				       enum mlxsw_reg_rauhtd_type type)
2464 {
2465 	int i, num_rec;
2466 	int err;
2467 
2468 	/* Ensure the RIF we read from the device does not change mid-dump. */
2469 	mutex_lock(&mlxsw_sp->router->lock);
2470 	do {
2471 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2472 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2473 				      rauhtd_pl);
2474 		if (err) {
2475 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2476 			break;
2477 		}
2478 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2479 		for (i = 0; i < num_rec; i++)
2480 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2481 							  i);
2482 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2483 	mutex_unlock(&mlxsw_sp->router->lock);
2484 
2485 	return err;
2486 }
2487 
2488 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2489 {
2490 	enum mlxsw_reg_rauhtd_type type;
2491 	char *rauhtd_pl;
2492 	int err;
2493 
2494 	if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count))
2495 		return 0;
2496 
2497 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2498 	if (!rauhtd_pl)
2499 		return -ENOMEM;
2500 
2501 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2502 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2503 	if (err)
2504 		goto out;
2505 
2506 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2507 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2508 out:
2509 	kfree(rauhtd_pl);
2510 	return err;
2511 }
2512 
2513 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2514 {
2515 	struct mlxsw_sp_neigh_entry *neigh_entry;
2516 
2517 	mutex_lock(&mlxsw_sp->router->lock);
2518 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2519 			    nexthop_neighs_list_node)
2520 		/* If this neigh have nexthops, make the kernel think this neigh
2521 		 * is active regardless of the traffic.
2522 		 */
2523 		neigh_event_send(neigh_entry->key.n, NULL);
2524 	mutex_unlock(&mlxsw_sp->router->lock);
2525 }
2526 
2527 static void
2528 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2529 {
2530 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2531 
2532 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2533 			       msecs_to_jiffies(interval));
2534 }
2535 
2536 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2537 {
2538 	struct mlxsw_sp_router *router;
2539 	int err;
2540 
2541 	router = container_of(work, struct mlxsw_sp_router,
2542 			      neighs_update.dw.work);
2543 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2544 	if (err)
2545 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2546 
2547 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2548 
2549 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2550 }
2551 
2552 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2553 {
2554 	struct mlxsw_sp_neigh_entry *neigh_entry;
2555 	struct mlxsw_sp_router *router;
2556 
2557 	router = container_of(work, struct mlxsw_sp_router,
2558 			      nexthop_probe_dw.work);
2559 	/* Iterate over nexthop neighbours, find those who are unresolved and
2560 	 * send arp on them. This solves the chicken-egg problem when
2561 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2562 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2563 	 * using different nexthop.
2564 	 */
2565 	mutex_lock(&router->lock);
2566 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2567 			    nexthop_neighs_list_node)
2568 		if (!neigh_entry->connected)
2569 			neigh_event_send(neigh_entry->key.n, NULL);
2570 	mutex_unlock(&router->lock);
2571 
2572 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2573 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2574 }
2575 
2576 static void
2577 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2578 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2579 			      bool removing, bool dead);
2580 
2581 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2582 {
2583 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2584 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2585 }
2586 
2587 static int
2588 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2589 				struct mlxsw_sp_neigh_entry *neigh_entry,
2590 				enum mlxsw_reg_rauht_op op)
2591 {
2592 	struct neighbour *n = neigh_entry->key.n;
2593 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2594 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2595 
2596 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2597 			      dip);
2598 	if (neigh_entry->counter_valid)
2599 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2600 					     neigh_entry->counter_index);
2601 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2602 }
2603 
2604 static int
2605 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2606 				struct mlxsw_sp_neigh_entry *neigh_entry,
2607 				enum mlxsw_reg_rauht_op op)
2608 {
2609 	struct neighbour *n = neigh_entry->key.n;
2610 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2611 	const char *dip = n->primary_key;
2612 
2613 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2614 			      dip);
2615 	if (neigh_entry->counter_valid)
2616 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2617 					     neigh_entry->counter_index);
2618 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2619 }
2620 
2621 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2622 {
2623 	struct neighbour *n = neigh_entry->key.n;
2624 
2625 	/* Packets with a link-local destination address are trapped
2626 	 * after LPM lookup and never reach the neighbour table, so
2627 	 * there is no need to program such neighbours to the device.
2628 	 */
2629 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2630 	    IPV6_ADDR_LINKLOCAL)
2631 		return true;
2632 	return false;
2633 }
2634 
2635 static void
2636 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2637 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2638 			    bool adding)
2639 {
2640 	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2641 	int err;
2642 
2643 	if (!adding && !neigh_entry->connected)
2644 		return;
2645 	neigh_entry->connected = adding;
2646 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2647 		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2648 						      op);
2649 		if (err)
2650 			return;
2651 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2652 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2653 			return;
2654 		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2655 						      op);
2656 		if (err)
2657 			return;
2658 	} else {
2659 		WARN_ON_ONCE(1);
2660 		return;
2661 	}
2662 
2663 	if (adding)
2664 		neigh_entry->key.n->flags |= NTF_OFFLOADED;
2665 	else
2666 		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2667 }
2668 
2669 void
2670 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2671 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2672 				    bool adding)
2673 {
2674 	if (adding)
2675 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2676 	else
2677 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2678 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2679 }
2680 
2681 struct mlxsw_sp_netevent_work {
2682 	struct work_struct work;
2683 	struct mlxsw_sp *mlxsw_sp;
2684 	struct neighbour *n;
2685 };
2686 
2687 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2688 {
2689 	struct mlxsw_sp_netevent_work *net_work =
2690 		container_of(work, struct mlxsw_sp_netevent_work, work);
2691 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2692 	struct mlxsw_sp_neigh_entry *neigh_entry;
2693 	struct neighbour *n = net_work->n;
2694 	unsigned char ha[ETH_ALEN];
2695 	bool entry_connected;
2696 	u8 nud_state, dead;
2697 
2698 	/* If these parameters are changed after we release the lock,
2699 	 * then we are guaranteed to receive another event letting us
2700 	 * know about it.
2701 	 */
2702 	read_lock_bh(&n->lock);
2703 	memcpy(ha, n->ha, ETH_ALEN);
2704 	nud_state = n->nud_state;
2705 	dead = n->dead;
2706 	read_unlock_bh(&n->lock);
2707 
2708 	mutex_lock(&mlxsw_sp->router->lock);
2709 	mlxsw_sp_span_respin(mlxsw_sp);
2710 
2711 	entry_connected = nud_state & NUD_VALID && !dead;
2712 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2713 	if (!entry_connected && !neigh_entry)
2714 		goto out;
2715 	if (!neigh_entry) {
2716 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2717 		if (IS_ERR(neigh_entry))
2718 			goto out;
2719 	}
2720 
2721 	if (neigh_entry->connected && entry_connected &&
2722 	    !memcmp(neigh_entry->ha, ha, ETH_ALEN))
2723 		goto out;
2724 
2725 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2726 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2727 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2728 				      dead);
2729 
2730 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2731 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2732 
2733 out:
2734 	mutex_unlock(&mlxsw_sp->router->lock);
2735 	neigh_release(n);
2736 	kfree(net_work);
2737 }
2738 
2739 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2740 
2741 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2742 {
2743 	struct mlxsw_sp_netevent_work *net_work =
2744 		container_of(work, struct mlxsw_sp_netevent_work, work);
2745 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2746 
2747 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2748 	kfree(net_work);
2749 }
2750 
2751 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2752 
2753 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2754 {
2755 	struct mlxsw_sp_netevent_work *net_work =
2756 		container_of(work, struct mlxsw_sp_netevent_work, work);
2757 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2758 
2759 	__mlxsw_sp_router_init(mlxsw_sp);
2760 	kfree(net_work);
2761 }
2762 
2763 static int mlxsw_sp_router_schedule_work(struct net *net,
2764 					 struct mlxsw_sp_router *router,
2765 					 struct neighbour *n,
2766 					 void (*cb)(struct work_struct *))
2767 {
2768 	struct mlxsw_sp_netevent_work *net_work;
2769 
2770 	if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2771 		return NOTIFY_DONE;
2772 
2773 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2774 	if (!net_work)
2775 		return NOTIFY_BAD;
2776 
2777 	INIT_WORK(&net_work->work, cb);
2778 	net_work->mlxsw_sp = router->mlxsw_sp;
2779 	net_work->n = n;
2780 	mlxsw_core_schedule_work(&net_work->work);
2781 	return NOTIFY_DONE;
2782 }
2783 
2784 static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev)
2785 {
2786 	struct mlxsw_sp_port *mlxsw_sp_port;
2787 
2788 	rcu_read_lock();
2789 	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
2790 	rcu_read_unlock();
2791 	return !!mlxsw_sp_port;
2792 }
2793 
2794 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2795 					  unsigned long event, void *ptr)
2796 {
2797 	struct mlxsw_sp_router *router;
2798 	unsigned long interval;
2799 	struct neigh_parms *p;
2800 	struct neighbour *n;
2801 	struct net *net;
2802 
2803 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2804 
2805 	switch (event) {
2806 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2807 		p = ptr;
2808 
2809 		/* We don't care about changes in the default table. */
2810 		if (!p->dev || (p->tbl->family != AF_INET &&
2811 				p->tbl->family != AF_INET6))
2812 			return NOTIFY_DONE;
2813 
2814 		/* We are in atomic context and can't take RTNL mutex,
2815 		 * so use RCU variant to walk the device chain.
2816 		 */
2817 		if (!mlxsw_sp_dev_lower_is_port(p->dev))
2818 			return NOTIFY_DONE;
2819 
2820 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2821 		router->neighs_update.interval = interval;
2822 		break;
2823 	case NETEVENT_NEIGH_UPDATE:
2824 		n = ptr;
2825 		net = neigh_parms_net(n->parms);
2826 
2827 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2828 			return NOTIFY_DONE;
2829 
2830 		if (!mlxsw_sp_dev_lower_is_port(n->dev))
2831 			return NOTIFY_DONE;
2832 
2833 		/* Take a reference to ensure the neighbour won't be
2834 		 * destructed until we drop the reference in delayed
2835 		 * work.
2836 		 */
2837 		neigh_clone(n);
2838 		return mlxsw_sp_router_schedule_work(net, router, n,
2839 				mlxsw_sp_router_neigh_event_work);
2840 
2841 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2842 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2843 		return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2844 				mlxsw_sp_router_mp_hash_event_work);
2845 
2846 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2847 		return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2848 				mlxsw_sp_router_update_priority_work);
2849 	}
2850 
2851 	return NOTIFY_DONE;
2852 }
2853 
2854 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2855 {
2856 	int err;
2857 
2858 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2859 			      &mlxsw_sp_neigh_ht_params);
2860 	if (err)
2861 		return err;
2862 
2863 	/* Initialize the polling interval according to the default
2864 	 * table.
2865 	 */
2866 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2867 
2868 	/* Create the delayed works for the activity_update */
2869 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2870 			  mlxsw_sp_router_neighs_update_work);
2871 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2872 			  mlxsw_sp_router_probe_unresolved_nexthops);
2873 	atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0);
2874 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2875 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2876 	return 0;
2877 }
2878 
2879 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2880 {
2881 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2882 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2883 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2884 }
2885 
2886 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2887 					 struct mlxsw_sp_rif *rif)
2888 {
2889 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2890 
2891 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2892 				 rif_list_node) {
2893 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2894 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2895 	}
2896 }
2897 
2898 enum mlxsw_sp_nexthop_type {
2899 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2900 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2901 };
2902 
2903 enum mlxsw_sp_nexthop_action {
2904 	/* Nexthop forwards packets to an egress RIF */
2905 	MLXSW_SP_NEXTHOP_ACTION_FORWARD,
2906 	/* Nexthop discards packets */
2907 	MLXSW_SP_NEXTHOP_ACTION_DISCARD,
2908 	/* Nexthop traps packets */
2909 	MLXSW_SP_NEXTHOP_ACTION_TRAP,
2910 };
2911 
2912 struct mlxsw_sp_nexthop_key {
2913 	struct fib_nh *fib_nh;
2914 };
2915 
2916 struct mlxsw_sp_nexthop {
2917 	struct list_head neigh_list_node; /* member of neigh entry list */
2918 	struct list_head rif_list_node;
2919 	struct list_head router_list_node;
2920 	struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
2921 						   * this nexthop belongs to
2922 						   */
2923 	struct rhash_head ht_node;
2924 	struct neigh_table *neigh_tbl;
2925 	struct mlxsw_sp_nexthop_key key;
2926 	unsigned char gw_addr[sizeof(struct in6_addr)];
2927 	int ifindex;
2928 	int nh_weight;
2929 	int norm_nh_weight;
2930 	int num_adj_entries;
2931 	struct mlxsw_sp_rif *rif;
2932 	u8 should_offload:1, /* set indicates this nexthop should be written
2933 			      * to the adjacency table.
2934 			      */
2935 	   offloaded:1, /* set indicates this nexthop was written to the
2936 			 * adjacency table.
2937 			 */
2938 	   update:1; /* set indicates this nexthop should be updated in the
2939 		      * adjacency table (f.e., its MAC changed).
2940 		      */
2941 	enum mlxsw_sp_nexthop_action action;
2942 	enum mlxsw_sp_nexthop_type type;
2943 	union {
2944 		struct mlxsw_sp_neigh_entry *neigh_entry;
2945 		struct mlxsw_sp_ipip_entry *ipip_entry;
2946 	};
2947 	unsigned int counter_index;
2948 	bool counter_valid;
2949 };
2950 
2951 static struct net_device *
2952 mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh)
2953 {
2954 	if (nh->rif)
2955 		return mlxsw_sp_rif_dev(nh->rif);
2956 	return NULL;
2957 }
2958 
2959 enum mlxsw_sp_nexthop_group_type {
2960 	MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
2961 	MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
2962 	MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
2963 };
2964 
2965 struct mlxsw_sp_nexthop_group_info {
2966 	struct mlxsw_sp_nexthop_group *nh_grp;
2967 	u32 adj_index;
2968 	u16 ecmp_size;
2969 	u16 count;
2970 	int sum_norm_weight;
2971 	u8 adj_index_valid:1,
2972 	   gateway:1, /* routes using the group use a gateway */
2973 	   is_resilient:1;
2974 	struct list_head list; /* member in nh_res_grp_list */
2975 	struct mlxsw_sp_nexthop nexthops[];
2976 };
2977 
2978 static struct mlxsw_sp_rif *
2979 mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi)
2980 {
2981 	return nhgi->nexthops[0].rif;
2982 }
2983 
2984 struct mlxsw_sp_nexthop_group_vr_key {
2985 	u16 vr_id;
2986 	enum mlxsw_sp_l3proto proto;
2987 };
2988 
2989 struct mlxsw_sp_nexthop_group_vr_entry {
2990 	struct list_head list; /* member in vr_list */
2991 	struct rhash_head ht_node; /* member in vr_ht */
2992 	refcount_t ref_count;
2993 	struct mlxsw_sp_nexthop_group_vr_key key;
2994 };
2995 
2996 struct mlxsw_sp_nexthop_group {
2997 	struct rhash_head ht_node;
2998 	struct list_head fib_list; /* list of fib entries that use this group */
2999 	union {
3000 		struct {
3001 			struct fib_info *fi;
3002 		} ipv4;
3003 		struct {
3004 			u32 id;
3005 		} obj;
3006 	};
3007 	struct mlxsw_sp_nexthop_group_info *nhgi;
3008 	struct list_head vr_list;
3009 	struct rhashtable vr_ht;
3010 	enum mlxsw_sp_nexthop_group_type type;
3011 	bool can_destroy;
3012 };
3013 
3014 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
3015 				    struct mlxsw_sp_nexthop *nh)
3016 {
3017 	struct devlink *devlink;
3018 
3019 	devlink = priv_to_devlink(mlxsw_sp->core);
3020 	if (!devlink_dpipe_table_counter_enabled(devlink,
3021 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
3022 		return;
3023 
3024 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
3025 		return;
3026 
3027 	nh->counter_valid = true;
3028 }
3029 
3030 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
3031 				   struct mlxsw_sp_nexthop *nh)
3032 {
3033 	if (!nh->counter_valid)
3034 		return;
3035 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
3036 	nh->counter_valid = false;
3037 }
3038 
3039 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
3040 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
3041 {
3042 	if (!nh->counter_valid)
3043 		return -EINVAL;
3044 
3045 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
3046 					 p_counter, NULL);
3047 }
3048 
3049 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
3050 					       struct mlxsw_sp_nexthop *nh)
3051 {
3052 	if (!nh) {
3053 		if (list_empty(&router->nexthop_list))
3054 			return NULL;
3055 		else
3056 			return list_first_entry(&router->nexthop_list,
3057 						typeof(*nh), router_list_node);
3058 	}
3059 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
3060 		return NULL;
3061 	return list_next_entry(nh, router_list_node);
3062 }
3063 
3064 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
3065 {
3066 	return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3067 }
3068 
3069 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
3070 {
3071 	if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
3072 	    !mlxsw_sp_nexthop_is_forward(nh))
3073 		return NULL;
3074 	return nh->neigh_entry->ha;
3075 }
3076 
3077 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
3078 			     u32 *p_adj_size, u32 *p_adj_hash_index)
3079 {
3080 	struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3081 	u32 adj_hash_index = 0;
3082 	int i;
3083 
3084 	if (!nh->offloaded || !nhgi->adj_index_valid)
3085 		return -EINVAL;
3086 
3087 	*p_adj_index = nhgi->adj_index;
3088 	*p_adj_size = nhgi->ecmp_size;
3089 
3090 	for (i = 0; i < nhgi->count; i++) {
3091 		struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3092 
3093 		if (nh_iter == nh)
3094 			break;
3095 		if (nh_iter->offloaded)
3096 			adj_hash_index += nh_iter->num_adj_entries;
3097 	}
3098 
3099 	*p_adj_hash_index = adj_hash_index;
3100 	return 0;
3101 }
3102 
3103 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3104 {
3105 	return nh->rif;
3106 }
3107 
3108 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3109 {
3110 	struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3111 	int i;
3112 
3113 	for (i = 0; i < nhgi->count; i++) {
3114 		struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3115 
3116 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3117 			return true;
3118 	}
3119 	return false;
3120 }
3121 
3122 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3123 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3124 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3125 	.key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3126 	.automatic_shrinking = true,
3127 };
3128 
3129 static struct mlxsw_sp_nexthop_group_vr_entry *
3130 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3131 				       const struct mlxsw_sp_fib *fib)
3132 {
3133 	struct mlxsw_sp_nexthop_group_vr_key key;
3134 
3135 	memset(&key, 0, sizeof(key));
3136 	key.vr_id = fib->vr->id;
3137 	key.proto = fib->proto;
3138 	return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3139 				      mlxsw_sp_nexthop_group_vr_ht_params);
3140 }
3141 
3142 static int
3143 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3144 				       const struct mlxsw_sp_fib *fib)
3145 {
3146 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3147 	int err;
3148 
3149 	vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3150 	if (!vr_entry)
3151 		return -ENOMEM;
3152 
3153 	vr_entry->key.vr_id = fib->vr->id;
3154 	vr_entry->key.proto = fib->proto;
3155 	refcount_set(&vr_entry->ref_count, 1);
3156 
3157 	err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3158 				     mlxsw_sp_nexthop_group_vr_ht_params);
3159 	if (err)
3160 		goto err_hashtable_insert;
3161 
3162 	list_add(&vr_entry->list, &nh_grp->vr_list);
3163 
3164 	return 0;
3165 
3166 err_hashtable_insert:
3167 	kfree(vr_entry);
3168 	return err;
3169 }
3170 
3171 static void
3172 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3173 					struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3174 {
3175 	list_del(&vr_entry->list);
3176 	rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3177 			       mlxsw_sp_nexthop_group_vr_ht_params);
3178 	kfree(vr_entry);
3179 }
3180 
3181 static int
3182 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3183 			       const struct mlxsw_sp_fib *fib)
3184 {
3185 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3186 
3187 	vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3188 	if (vr_entry) {
3189 		refcount_inc(&vr_entry->ref_count);
3190 		return 0;
3191 	}
3192 
3193 	return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3194 }
3195 
3196 static void
3197 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3198 				 const struct mlxsw_sp_fib *fib)
3199 {
3200 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3201 
3202 	vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3203 	if (WARN_ON_ONCE(!vr_entry))
3204 		return;
3205 
3206 	if (!refcount_dec_and_test(&vr_entry->ref_count))
3207 		return;
3208 
3209 	mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3210 }
3211 
3212 struct mlxsw_sp_nexthop_group_cmp_arg {
3213 	enum mlxsw_sp_nexthop_group_type type;
3214 	union {
3215 		struct fib_info *fi;
3216 		struct mlxsw_sp_fib6_entry *fib6_entry;
3217 		u32 id;
3218 	};
3219 };
3220 
3221 static bool
3222 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3223 				    const struct in6_addr *gw, int ifindex,
3224 				    int weight)
3225 {
3226 	int i;
3227 
3228 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3229 		const struct mlxsw_sp_nexthop *nh;
3230 
3231 		nh = &nh_grp->nhgi->nexthops[i];
3232 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3233 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3234 			return true;
3235 	}
3236 
3237 	return false;
3238 }
3239 
3240 static bool
3241 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3242 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
3243 {
3244 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3245 
3246 	if (nh_grp->nhgi->count != fib6_entry->nrt6)
3247 		return false;
3248 
3249 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3250 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3251 		struct in6_addr *gw;
3252 		int ifindex, weight;
3253 
3254 		ifindex = fib6_nh->fib_nh_dev->ifindex;
3255 		weight = fib6_nh->fib_nh_weight;
3256 		gw = &fib6_nh->fib_nh_gw6;
3257 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3258 							 weight))
3259 			return false;
3260 	}
3261 
3262 	return true;
3263 }
3264 
3265 static int
3266 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3267 {
3268 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3269 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3270 
3271 	if (nh_grp->type != cmp_arg->type)
3272 		return 1;
3273 
3274 	switch (cmp_arg->type) {
3275 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3276 		return cmp_arg->fi != nh_grp->ipv4.fi;
3277 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3278 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3279 						    cmp_arg->fib6_entry);
3280 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3281 		return cmp_arg->id != nh_grp->obj.id;
3282 	default:
3283 		WARN_ON(1);
3284 		return 1;
3285 	}
3286 }
3287 
3288 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3289 {
3290 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
3291 	const struct mlxsw_sp_nexthop *nh;
3292 	struct fib_info *fi;
3293 	unsigned int val;
3294 	int i;
3295 
3296 	switch (nh_grp->type) {
3297 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3298 		fi = nh_grp->ipv4.fi;
3299 		return jhash(&fi, sizeof(fi), seed);
3300 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3301 		val = nh_grp->nhgi->count;
3302 		for (i = 0; i < nh_grp->nhgi->count; i++) {
3303 			nh = &nh_grp->nhgi->nexthops[i];
3304 			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3305 			val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3306 		}
3307 		return jhash(&val, sizeof(val), seed);
3308 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3309 		return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3310 	default:
3311 		WARN_ON(1);
3312 		return 0;
3313 	}
3314 }
3315 
3316 static u32
3317 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3318 {
3319 	unsigned int val = fib6_entry->nrt6;
3320 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3321 
3322 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3323 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3324 		struct net_device *dev = fib6_nh->fib_nh_dev;
3325 		struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3326 
3327 		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3328 		val ^= jhash(gw, sizeof(*gw), seed);
3329 	}
3330 
3331 	return jhash(&val, sizeof(val), seed);
3332 }
3333 
3334 static u32
3335 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3336 {
3337 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3338 
3339 	switch (cmp_arg->type) {
3340 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3341 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3342 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3343 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3344 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3345 		return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3346 	default:
3347 		WARN_ON(1);
3348 		return 0;
3349 	}
3350 }
3351 
3352 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3353 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3354 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
3355 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3356 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3357 };
3358 
3359 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3360 					 struct mlxsw_sp_nexthop_group *nh_grp)
3361 {
3362 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3363 	    !nh_grp->nhgi->gateway)
3364 		return 0;
3365 
3366 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3367 				      &nh_grp->ht_node,
3368 				      mlxsw_sp_nexthop_group_ht_params);
3369 }
3370 
3371 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3372 					  struct mlxsw_sp_nexthop_group *nh_grp)
3373 {
3374 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3375 	    !nh_grp->nhgi->gateway)
3376 		return;
3377 
3378 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3379 			       &nh_grp->ht_node,
3380 			       mlxsw_sp_nexthop_group_ht_params);
3381 }
3382 
3383 static struct mlxsw_sp_nexthop_group *
3384 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3385 			       struct fib_info *fi)
3386 {
3387 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3388 
3389 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3390 	cmp_arg.fi = fi;
3391 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3392 				      &cmp_arg,
3393 				      mlxsw_sp_nexthop_group_ht_params);
3394 }
3395 
3396 static struct mlxsw_sp_nexthop_group *
3397 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3398 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3399 {
3400 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3401 
3402 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3403 	cmp_arg.fib6_entry = fib6_entry;
3404 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3405 				      &cmp_arg,
3406 				      mlxsw_sp_nexthop_group_ht_params);
3407 }
3408 
3409 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3410 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3411 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3412 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3413 };
3414 
3415 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3416 				   struct mlxsw_sp_nexthop *nh)
3417 {
3418 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3419 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3420 }
3421 
3422 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3423 				    struct mlxsw_sp_nexthop *nh)
3424 {
3425 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3426 			       mlxsw_sp_nexthop_ht_params);
3427 }
3428 
3429 static struct mlxsw_sp_nexthop *
3430 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3431 			struct mlxsw_sp_nexthop_key key)
3432 {
3433 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3434 				      mlxsw_sp_nexthop_ht_params);
3435 }
3436 
3437 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3438 					     enum mlxsw_sp_l3proto proto,
3439 					     u16 vr_id,
3440 					     u32 adj_index, u16 ecmp_size,
3441 					     u32 new_adj_index,
3442 					     u16 new_ecmp_size)
3443 {
3444 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3445 
3446 	mlxsw_reg_raleu_pack(raleu_pl,
3447 			     (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3448 			     adj_index, ecmp_size, new_adj_index,
3449 			     new_ecmp_size);
3450 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3451 }
3452 
3453 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3454 					  struct mlxsw_sp_nexthop_group *nh_grp,
3455 					  u32 old_adj_index, u16 old_ecmp_size)
3456 {
3457 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3458 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3459 	int err;
3460 
3461 	list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3462 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3463 							vr_entry->key.proto,
3464 							vr_entry->key.vr_id,
3465 							old_adj_index,
3466 							old_ecmp_size,
3467 							nhgi->adj_index,
3468 							nhgi->ecmp_size);
3469 		if (err)
3470 			goto err_mass_update_vr;
3471 	}
3472 	return 0;
3473 
3474 err_mass_update_vr:
3475 	list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3476 		mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3477 						  vr_entry->key.vr_id,
3478 						  nhgi->adj_index,
3479 						  nhgi->ecmp_size,
3480 						  old_adj_index, old_ecmp_size);
3481 	return err;
3482 }
3483 
3484 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
3485 					 u32 adj_index,
3486 					 struct mlxsw_sp_nexthop *nh,
3487 					 bool force, char *ratr_pl)
3488 {
3489 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3490 	enum mlxsw_reg_ratr_op op;
3491 	u16 rif_index;
3492 
3493 	rif_index = nh->rif ? nh->rif->rif_index :
3494 			      mlxsw_sp->router->lb_rif_index;
3495 	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
3496 		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
3497 	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
3498 			    adj_index, rif_index);
3499 	switch (nh->action) {
3500 	case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
3501 		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3502 		break;
3503 	case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
3504 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
3505 					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3506 		break;
3507 	case MLXSW_SP_NEXTHOP_ACTION_TRAP:
3508 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
3509 					       MLXSW_REG_RATR_TRAP_ACTION_TRAP);
3510 		mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
3511 		break;
3512 	default:
3513 		WARN_ON_ONCE(1);
3514 		return -EINVAL;
3515 	}
3516 	if (nh->counter_valid)
3517 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3518 	else
3519 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3520 
3521 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3522 }
3523 
3524 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3525 				struct mlxsw_sp_nexthop *nh, bool force,
3526 				char *ratr_pl)
3527 {
3528 	int i;
3529 
3530 	for (i = 0; i < nh->num_adj_entries; i++) {
3531 		int err;
3532 
3533 		err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
3534 						    nh, force, ratr_pl);
3535 		if (err)
3536 			return err;
3537 	}
3538 
3539 	return 0;
3540 }
3541 
3542 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3543 					  u32 adj_index,
3544 					  struct mlxsw_sp_nexthop *nh,
3545 					  bool force, char *ratr_pl)
3546 {
3547 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3548 
3549 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3550 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
3551 					force, ratr_pl);
3552 }
3553 
3554 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3555 					u32 adj_index,
3556 					struct mlxsw_sp_nexthop *nh, bool force,
3557 					char *ratr_pl)
3558 {
3559 	int i;
3560 
3561 	for (i = 0; i < nh->num_adj_entries; i++) {
3562 		int err;
3563 
3564 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3565 						     nh, force, ratr_pl);
3566 		if (err)
3567 			return err;
3568 	}
3569 
3570 	return 0;
3571 }
3572 
3573 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3574 				   struct mlxsw_sp_nexthop *nh, bool force,
3575 				   char *ratr_pl)
3576 {
3577 	/* When action is discard or trap, the nexthop must be
3578 	 * programmed as an Ethernet nexthop.
3579 	 */
3580 	if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
3581 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
3582 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3583 		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
3584 						   force, ratr_pl);
3585 	else
3586 		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
3587 						    force, ratr_pl);
3588 }
3589 
3590 static int
3591 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3592 			      struct mlxsw_sp_nexthop_group_info *nhgi,
3593 			      bool reallocate)
3594 {
3595 	char ratr_pl[MLXSW_REG_RATR_LEN];
3596 	u32 adj_index = nhgi->adj_index; /* base */
3597 	struct mlxsw_sp_nexthop *nh;
3598 	int i;
3599 
3600 	for (i = 0; i < nhgi->count; i++) {
3601 		nh = &nhgi->nexthops[i];
3602 
3603 		if (!nh->should_offload) {
3604 			nh->offloaded = 0;
3605 			continue;
3606 		}
3607 
3608 		if (nh->update || reallocate) {
3609 			int err = 0;
3610 
3611 			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
3612 						      true, ratr_pl);
3613 			if (err)
3614 				return err;
3615 			nh->update = 0;
3616 			nh->offloaded = 1;
3617 		}
3618 		adj_index += nh->num_adj_entries;
3619 	}
3620 	return 0;
3621 }
3622 
3623 static int
3624 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3625 				    struct mlxsw_sp_nexthop_group *nh_grp)
3626 {
3627 	struct mlxsw_sp_fib_entry *fib_entry;
3628 	int err;
3629 
3630 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3631 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3632 		if (err)
3633 			return err;
3634 	}
3635 	return 0;
3636 }
3637 
3638 struct mlxsw_sp_adj_grp_size_range {
3639 	u16 start; /* Inclusive */
3640 	u16 end; /* Inclusive */
3641 };
3642 
3643 /* Ordered by range start value */
3644 static const struct mlxsw_sp_adj_grp_size_range
3645 mlxsw_sp1_adj_grp_size_ranges[] = {
3646 	{ .start = 1, .end = 64 },
3647 	{ .start = 512, .end = 512 },
3648 	{ .start = 1024, .end = 1024 },
3649 	{ .start = 2048, .end = 2048 },
3650 	{ .start = 4096, .end = 4096 },
3651 };
3652 
3653 /* Ordered by range start value */
3654 static const struct mlxsw_sp_adj_grp_size_range
3655 mlxsw_sp2_adj_grp_size_ranges[] = {
3656 	{ .start = 1, .end = 128 },
3657 	{ .start = 256, .end = 256 },
3658 	{ .start = 512, .end = 512 },
3659 	{ .start = 1024, .end = 1024 },
3660 	{ .start = 2048, .end = 2048 },
3661 	{ .start = 4096, .end = 4096 },
3662 };
3663 
3664 static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
3665 					   u16 *p_adj_grp_size)
3666 {
3667 	int i;
3668 
3669 	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
3670 		const struct mlxsw_sp_adj_grp_size_range *size_range;
3671 
3672 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3673 
3674 		if (*p_adj_grp_size >= size_range->start &&
3675 		    *p_adj_grp_size <= size_range->end)
3676 			return;
3677 
3678 		if (*p_adj_grp_size <= size_range->end) {
3679 			*p_adj_grp_size = size_range->end;
3680 			return;
3681 		}
3682 	}
3683 }
3684 
3685 static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
3686 					     u16 *p_adj_grp_size,
3687 					     unsigned int alloc_size)
3688 {
3689 	int i;
3690 
3691 	for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
3692 		const struct mlxsw_sp_adj_grp_size_range *size_range;
3693 
3694 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3695 
3696 		if (alloc_size >= size_range->end) {
3697 			*p_adj_grp_size = size_range->end;
3698 			return;
3699 		}
3700 	}
3701 }
3702 
3703 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3704 				     u16 *p_adj_grp_size)
3705 {
3706 	unsigned int alloc_size;
3707 	int err;
3708 
3709 	/* Round up the requested group size to the next size supported
3710 	 * by the device and make sure the request can be satisfied.
3711 	 */
3712 	mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
3713 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3714 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3715 					      *p_adj_grp_size, &alloc_size);
3716 	if (err)
3717 		return err;
3718 	/* It is possible the allocation results in more allocated
3719 	 * entries than requested. Try to use as much of them as
3720 	 * possible.
3721 	 */
3722 	mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
3723 
3724 	return 0;
3725 }
3726 
3727 static void
3728 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3729 {
3730 	int i, g = 0, sum_norm_weight = 0;
3731 	struct mlxsw_sp_nexthop *nh;
3732 
3733 	for (i = 0; i < nhgi->count; i++) {
3734 		nh = &nhgi->nexthops[i];
3735 
3736 		if (!nh->should_offload)
3737 			continue;
3738 		if (g > 0)
3739 			g = gcd(nh->nh_weight, g);
3740 		else
3741 			g = nh->nh_weight;
3742 	}
3743 
3744 	for (i = 0; i < nhgi->count; i++) {
3745 		nh = &nhgi->nexthops[i];
3746 
3747 		if (!nh->should_offload)
3748 			continue;
3749 		nh->norm_nh_weight = nh->nh_weight / g;
3750 		sum_norm_weight += nh->norm_nh_weight;
3751 	}
3752 
3753 	nhgi->sum_norm_weight = sum_norm_weight;
3754 }
3755 
3756 static void
3757 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3758 {
3759 	int i, weight = 0, lower_bound = 0;
3760 	int total = nhgi->sum_norm_weight;
3761 	u16 ecmp_size = nhgi->ecmp_size;
3762 
3763 	for (i = 0; i < nhgi->count; i++) {
3764 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3765 		int upper_bound;
3766 
3767 		if (!nh->should_offload)
3768 			continue;
3769 		weight += nh->norm_nh_weight;
3770 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3771 		nh->num_adj_entries = upper_bound - lower_bound;
3772 		lower_bound = upper_bound;
3773 	}
3774 }
3775 
3776 static struct mlxsw_sp_nexthop *
3777 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3778 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3779 
3780 static void
3781 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3782 					struct mlxsw_sp_nexthop_group *nh_grp)
3783 {
3784 	int i;
3785 
3786 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3787 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3788 
3789 		if (nh->offloaded)
3790 			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3791 		else
3792 			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3793 	}
3794 }
3795 
3796 static void
3797 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3798 					  struct mlxsw_sp_fib6_entry *fib6_entry)
3799 {
3800 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3801 
3802 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3803 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3804 		struct mlxsw_sp_nexthop *nh;
3805 
3806 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3807 		if (nh && nh->offloaded)
3808 			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3809 		else
3810 			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3811 	}
3812 }
3813 
3814 static void
3815 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3816 					struct mlxsw_sp_nexthop_group *nh_grp)
3817 {
3818 	struct mlxsw_sp_fib6_entry *fib6_entry;
3819 
3820 	/* Unfortunately, in IPv6 the route and the nexthop are described by
3821 	 * the same struct, so we need to iterate over all the routes using the
3822 	 * nexthop group and set / clear the offload indication for them.
3823 	 */
3824 	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3825 			    common.nexthop_group_node)
3826 		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3827 }
3828 
3829 static void
3830 mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3831 					const struct mlxsw_sp_nexthop *nh,
3832 					u16 bucket_index)
3833 {
3834 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3835 	bool offload = false, trap = false;
3836 
3837 	if (nh->offloaded) {
3838 		if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3839 			trap = true;
3840 		else
3841 			offload = true;
3842 	}
3843 	nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3844 				    bucket_index, offload, trap);
3845 }
3846 
3847 static void
3848 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3849 					   struct mlxsw_sp_nexthop_group *nh_grp)
3850 {
3851 	int i;
3852 
3853 	/* Do not update the flags if the nexthop group is being destroyed
3854 	 * since:
3855 	 * 1. The nexthop objects is being deleted, in which case the flags are
3856 	 * irrelevant.
3857 	 * 2. The nexthop group was replaced by a newer group, in which case
3858 	 * the flags of the nexthop object were already updated based on the
3859 	 * new group.
3860 	 */
3861 	if (nh_grp->can_destroy)
3862 		return;
3863 
3864 	nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3865 			     nh_grp->nhgi->adj_index_valid, false);
3866 
3867 	/* Update flags of individual nexthop buckets in case of a resilient
3868 	 * nexthop group.
3869 	 */
3870 	if (!nh_grp->nhgi->is_resilient)
3871 		return;
3872 
3873 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3874 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3875 
3876 		mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
3877 	}
3878 }
3879 
3880 static void
3881 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3882 				       struct mlxsw_sp_nexthop_group *nh_grp)
3883 {
3884 	switch (nh_grp->type) {
3885 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3886 		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3887 		break;
3888 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3889 		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3890 		break;
3891 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3892 		mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
3893 		break;
3894 	}
3895 }
3896 
3897 static int
3898 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3899 			       struct mlxsw_sp_nexthop_group *nh_grp)
3900 {
3901 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3902 	u16 ecmp_size, old_ecmp_size;
3903 	struct mlxsw_sp_nexthop *nh;
3904 	bool offload_change = false;
3905 	u32 adj_index;
3906 	bool old_adj_index_valid;
3907 	u32 old_adj_index;
3908 	int i, err2, err;
3909 
3910 	if (!nhgi->gateway)
3911 		return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3912 
3913 	for (i = 0; i < nhgi->count; i++) {
3914 		nh = &nhgi->nexthops[i];
3915 
3916 		if (nh->should_offload != nh->offloaded) {
3917 			offload_change = true;
3918 			if (nh->should_offload)
3919 				nh->update = 1;
3920 		}
3921 	}
3922 	if (!offload_change) {
3923 		/* Nothing was added or removed, so no need to reallocate. Just
3924 		 * update MAC on existing adjacency indexes.
3925 		 */
3926 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
3927 		if (err) {
3928 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3929 			goto set_trap;
3930 		}
3931 		/* Flags of individual nexthop buckets might need to be
3932 		 * updated.
3933 		 */
3934 		mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3935 		return 0;
3936 	}
3937 	mlxsw_sp_nexthop_group_normalize(nhgi);
3938 	if (!nhgi->sum_norm_weight) {
3939 		/* No neigh of this group is connected so we just set
3940 		 * the trap and let everthing flow through kernel.
3941 		 */
3942 		err = 0;
3943 		goto set_trap;
3944 	}
3945 
3946 	ecmp_size = nhgi->sum_norm_weight;
3947 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3948 	if (err)
3949 		/* No valid allocation size available. */
3950 		goto set_trap;
3951 
3952 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3953 				  ecmp_size, &adj_index);
3954 	if (err) {
3955 		/* We ran out of KVD linear space, just set the
3956 		 * trap and let everything flow through kernel.
3957 		 */
3958 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3959 		goto set_trap;
3960 	}
3961 	old_adj_index_valid = nhgi->adj_index_valid;
3962 	old_adj_index = nhgi->adj_index;
3963 	old_ecmp_size = nhgi->ecmp_size;
3964 	nhgi->adj_index_valid = 1;
3965 	nhgi->adj_index = adj_index;
3966 	nhgi->ecmp_size = ecmp_size;
3967 	mlxsw_sp_nexthop_group_rebalance(nhgi);
3968 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
3969 	if (err) {
3970 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3971 		goto set_trap;
3972 	}
3973 
3974 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3975 
3976 	if (!old_adj_index_valid) {
3977 		/* The trap was set for fib entries, so we have to call
3978 		 * fib entry update to unset it and use adjacency index.
3979 		 */
3980 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3981 		if (err) {
3982 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3983 			goto set_trap;
3984 		}
3985 		return 0;
3986 	}
3987 
3988 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3989 					     old_adj_index, old_ecmp_size);
3990 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3991 			   old_ecmp_size, old_adj_index);
3992 	if (err) {
3993 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3994 		goto set_trap;
3995 	}
3996 
3997 	return 0;
3998 
3999 set_trap:
4000 	old_adj_index_valid = nhgi->adj_index_valid;
4001 	nhgi->adj_index_valid = 0;
4002 	for (i = 0; i < nhgi->count; i++) {
4003 		nh = &nhgi->nexthops[i];
4004 		nh->offloaded = 0;
4005 	}
4006 	err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4007 	if (err2)
4008 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
4009 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4010 	if (old_adj_index_valid)
4011 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4012 				   nhgi->ecmp_size, nhgi->adj_index);
4013 	return err;
4014 }
4015 
4016 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
4017 					    bool removing)
4018 {
4019 	if (!removing) {
4020 		nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
4021 		nh->should_offload = 1;
4022 	} else if (nh->nhgi->is_resilient) {
4023 		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4024 		nh->should_offload = 1;
4025 	} else {
4026 		nh->should_offload = 0;
4027 	}
4028 	nh->update = 1;
4029 }
4030 
4031 static int
4032 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
4033 				    struct mlxsw_sp_neigh_entry *neigh_entry)
4034 {
4035 	struct neighbour *n, *old_n = neigh_entry->key.n;
4036 	struct mlxsw_sp_nexthop *nh;
4037 	struct net_device *dev;
4038 	bool entry_connected;
4039 	u8 nud_state, dead;
4040 	int err;
4041 
4042 	nh = list_first_entry(&neigh_entry->nexthop_list,
4043 			      struct mlxsw_sp_nexthop, neigh_list_node);
4044 	dev = mlxsw_sp_nexthop_dev(nh);
4045 
4046 	n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4047 	if (!n) {
4048 		n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4049 		if (IS_ERR(n))
4050 			return PTR_ERR(n);
4051 		neigh_event_send(n, NULL);
4052 	}
4053 
4054 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
4055 	neigh_entry->key.n = n;
4056 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4057 	if (err)
4058 		goto err_neigh_entry_insert;
4059 
4060 	read_lock_bh(&n->lock);
4061 	nud_state = n->nud_state;
4062 	dead = n->dead;
4063 	read_unlock_bh(&n->lock);
4064 	entry_connected = nud_state & NUD_VALID && !dead;
4065 
4066 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
4067 			    neigh_list_node) {
4068 		neigh_release(old_n);
4069 		neigh_clone(n);
4070 		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
4071 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4072 	}
4073 
4074 	neigh_release(n);
4075 
4076 	return 0;
4077 
4078 err_neigh_entry_insert:
4079 	neigh_entry->key.n = old_n;
4080 	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4081 	neigh_release(n);
4082 	return err;
4083 }
4084 
4085 static void
4086 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
4087 			      struct mlxsw_sp_neigh_entry *neigh_entry,
4088 			      bool removing, bool dead)
4089 {
4090 	struct mlxsw_sp_nexthop *nh;
4091 
4092 	if (list_empty(&neigh_entry->nexthop_list))
4093 		return;
4094 
4095 	if (dead) {
4096 		int err;
4097 
4098 		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
4099 							  neigh_entry);
4100 		if (err)
4101 			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
4102 		return;
4103 	}
4104 
4105 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
4106 			    neigh_list_node) {
4107 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
4108 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4109 	}
4110 }
4111 
4112 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
4113 				      struct mlxsw_sp_rif *rif)
4114 {
4115 	if (nh->rif)
4116 		return;
4117 
4118 	nh->rif = rif;
4119 	list_add(&nh->rif_list_node, &rif->nexthop_list);
4120 }
4121 
4122 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
4123 {
4124 	if (!nh->rif)
4125 		return;
4126 
4127 	list_del(&nh->rif_list_node);
4128 	nh->rif = NULL;
4129 }
4130 
4131 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
4132 				       struct mlxsw_sp_nexthop *nh)
4133 {
4134 	struct mlxsw_sp_neigh_entry *neigh_entry;
4135 	struct net_device *dev;
4136 	struct neighbour *n;
4137 	u8 nud_state, dead;
4138 	int err;
4139 
4140 	if (!nh->nhgi->gateway || nh->neigh_entry)
4141 		return 0;
4142 	dev = mlxsw_sp_nexthop_dev(nh);
4143 
4144 	/* Take a reference of neigh here ensuring that neigh would
4145 	 * not be destructed before the nexthop entry is finished.
4146 	 * The reference is taken either in neigh_lookup() or
4147 	 * in neigh_create() in case n is not found.
4148 	 */
4149 	n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4150 	if (!n) {
4151 		n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4152 		if (IS_ERR(n))
4153 			return PTR_ERR(n);
4154 		neigh_event_send(n, NULL);
4155 	}
4156 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
4157 	if (!neigh_entry) {
4158 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
4159 		if (IS_ERR(neigh_entry)) {
4160 			err = -EINVAL;
4161 			goto err_neigh_entry_create;
4162 		}
4163 	}
4164 
4165 	/* If that is the first nexthop connected to that neigh, add to
4166 	 * nexthop_neighs_list
4167 	 */
4168 	if (list_empty(&neigh_entry->nexthop_list))
4169 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
4170 			      &mlxsw_sp->router->nexthop_neighs_list);
4171 
4172 	nh->neigh_entry = neigh_entry;
4173 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
4174 	read_lock_bh(&n->lock);
4175 	nud_state = n->nud_state;
4176 	dead = n->dead;
4177 	read_unlock_bh(&n->lock);
4178 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
4179 
4180 	return 0;
4181 
4182 err_neigh_entry_create:
4183 	neigh_release(n);
4184 	return err;
4185 }
4186 
4187 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
4188 					struct mlxsw_sp_nexthop *nh)
4189 {
4190 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
4191 	struct neighbour *n;
4192 
4193 	if (!neigh_entry)
4194 		return;
4195 	n = neigh_entry->key.n;
4196 
4197 	__mlxsw_sp_nexthop_neigh_update(nh, true);
4198 	list_del(&nh->neigh_list_node);
4199 	nh->neigh_entry = NULL;
4200 
4201 	/* If that is the last nexthop connected to that neigh, remove from
4202 	 * nexthop_neighs_list
4203 	 */
4204 	if (list_empty(&neigh_entry->nexthop_list))
4205 		list_del(&neigh_entry->nexthop_neighs_list_node);
4206 
4207 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4208 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4209 
4210 	neigh_release(n);
4211 }
4212 
4213 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4214 {
4215 	struct net_device *ul_dev;
4216 	bool is_up;
4217 
4218 	rcu_read_lock();
4219 	ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4220 	is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4221 	rcu_read_unlock();
4222 
4223 	return is_up;
4224 }
4225 
4226 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4227 				       struct mlxsw_sp_nexthop *nh,
4228 				       struct mlxsw_sp_ipip_entry *ipip_entry)
4229 {
4230 	bool removing;
4231 
4232 	if (!nh->nhgi->gateway || nh->ipip_entry)
4233 		return;
4234 
4235 	nh->ipip_entry = ipip_entry;
4236 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4237 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
4238 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
4239 }
4240 
4241 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4242 				       struct mlxsw_sp_nexthop *nh)
4243 {
4244 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4245 
4246 	if (!ipip_entry)
4247 		return;
4248 
4249 	__mlxsw_sp_nexthop_neigh_update(nh, true);
4250 	nh->ipip_entry = NULL;
4251 }
4252 
4253 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4254 					const struct fib_nh *fib_nh,
4255 					enum mlxsw_sp_ipip_type *p_ipipt)
4256 {
4257 	struct net_device *dev = fib_nh->fib_nh_dev;
4258 
4259 	return dev &&
4260 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4261 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4262 }
4263 
4264 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4265 				      struct mlxsw_sp_nexthop *nh,
4266 				      const struct net_device *dev)
4267 {
4268 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4269 	struct mlxsw_sp_ipip_entry *ipip_entry;
4270 	struct mlxsw_sp_rif *rif;
4271 	int err;
4272 
4273 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4274 	if (ipip_entry) {
4275 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4276 		if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4277 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4278 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4279 			return 0;
4280 		}
4281 	}
4282 
4283 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4284 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4285 	if (!rif)
4286 		return 0;
4287 
4288 	mlxsw_sp_nexthop_rif_init(nh, rif);
4289 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4290 	if (err)
4291 		goto err_neigh_init;
4292 
4293 	return 0;
4294 
4295 err_neigh_init:
4296 	mlxsw_sp_nexthop_rif_fini(nh);
4297 	return err;
4298 }
4299 
4300 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4301 				       struct mlxsw_sp_nexthop *nh)
4302 {
4303 	switch (nh->type) {
4304 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
4305 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4306 		mlxsw_sp_nexthop_rif_fini(nh);
4307 		break;
4308 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4309 		mlxsw_sp_nexthop_rif_fini(nh);
4310 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4311 		break;
4312 	}
4313 }
4314 
4315 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4316 				  struct mlxsw_sp_nexthop_group *nh_grp,
4317 				  struct mlxsw_sp_nexthop *nh,
4318 				  struct fib_nh *fib_nh)
4319 {
4320 	struct net_device *dev = fib_nh->fib_nh_dev;
4321 	struct in_device *in_dev;
4322 	int err;
4323 
4324 	nh->nhgi = nh_grp->nhgi;
4325 	nh->key.fib_nh = fib_nh;
4326 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4327 	nh->nh_weight = fib_nh->fib_nh_weight;
4328 #else
4329 	nh->nh_weight = 1;
4330 #endif
4331 	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4332 	nh->neigh_tbl = &arp_tbl;
4333 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4334 	if (err)
4335 		return err;
4336 
4337 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4338 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4339 
4340 	if (!dev)
4341 		return 0;
4342 	nh->ifindex = dev->ifindex;
4343 
4344 	rcu_read_lock();
4345 	in_dev = __in_dev_get_rcu(dev);
4346 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4347 	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4348 		rcu_read_unlock();
4349 		return 0;
4350 	}
4351 	rcu_read_unlock();
4352 
4353 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4354 	if (err)
4355 		goto err_nexthop_neigh_init;
4356 
4357 	return 0;
4358 
4359 err_nexthop_neigh_init:
4360 	list_del(&nh->router_list_node);
4361 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4362 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4363 	return err;
4364 }
4365 
4366 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4367 				   struct mlxsw_sp_nexthop *nh)
4368 {
4369 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4370 	list_del(&nh->router_list_node);
4371 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4372 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4373 }
4374 
4375 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4376 				    unsigned long event, struct fib_nh *fib_nh)
4377 {
4378 	struct mlxsw_sp_nexthop_key key;
4379 	struct mlxsw_sp_nexthop *nh;
4380 
4381 	key.fib_nh = fib_nh;
4382 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4383 	if (!nh)
4384 		return;
4385 
4386 	switch (event) {
4387 	case FIB_EVENT_NH_ADD:
4388 		mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4389 		break;
4390 	case FIB_EVENT_NH_DEL:
4391 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4392 		break;
4393 	}
4394 
4395 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4396 }
4397 
4398 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4399 					struct mlxsw_sp_rif *rif)
4400 {
4401 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
4402 	struct mlxsw_sp_nexthop *nh;
4403 	bool removing;
4404 
4405 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
4406 		switch (nh->type) {
4407 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
4408 			removing = false;
4409 			break;
4410 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4411 			removing = !mlxsw_sp_ipip_netdev_ul_up(dev);
4412 			break;
4413 		default:
4414 			WARN_ON(1);
4415 			continue;
4416 		}
4417 
4418 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
4419 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4420 	}
4421 }
4422 
4423 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
4424 					 struct mlxsw_sp_rif *old_rif,
4425 					 struct mlxsw_sp_rif *new_rif)
4426 {
4427 	struct mlxsw_sp_nexthop *nh;
4428 
4429 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
4430 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
4431 		nh->rif = new_rif;
4432 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
4433 }
4434 
4435 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4436 					   struct mlxsw_sp_rif *rif)
4437 {
4438 	struct mlxsw_sp_nexthop *nh, *tmp;
4439 
4440 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
4441 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4442 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4443 	}
4444 }
4445 
4446 static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp)
4447 {
4448 	enum mlxsw_reg_ratr_trap_action trap_action;
4449 	char ratr_pl[MLXSW_REG_RATR_LEN];
4450 	int err;
4451 
4452 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4453 				  &mlxsw_sp->router->adj_trap_index);
4454 	if (err)
4455 		return err;
4456 
4457 	trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
4458 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4459 			    MLXSW_REG_RATR_TYPE_ETHERNET,
4460 			    mlxsw_sp->router->adj_trap_index,
4461 			    mlxsw_sp->router->lb_rif_index);
4462 	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4463 	mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
4464 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4465 	if (err)
4466 		goto err_ratr_write;
4467 
4468 	return 0;
4469 
4470 err_ratr_write:
4471 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4472 			   mlxsw_sp->router->adj_trap_index);
4473 	return err;
4474 }
4475 
4476 static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp)
4477 {
4478 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4479 			   mlxsw_sp->router->adj_trap_index);
4480 }
4481 
4482 static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp)
4483 {
4484 	int err;
4485 
4486 	if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups))
4487 		return 0;
4488 
4489 	err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp);
4490 	if (err)
4491 		return err;
4492 
4493 	refcount_set(&mlxsw_sp->router->num_groups, 1);
4494 
4495 	return 0;
4496 }
4497 
4498 static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp)
4499 {
4500 	if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups))
4501 		return;
4502 
4503 	mlxsw_sp_adj_trap_entry_fini(mlxsw_sp);
4504 }
4505 
4506 static void
4507 mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
4508 			     const struct mlxsw_sp_nexthop_group *nh_grp,
4509 			     unsigned long *activity)
4510 {
4511 	char *ratrad_pl;
4512 	int i, err;
4513 
4514 	ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
4515 	if (!ratrad_pl)
4516 		return;
4517 
4518 	mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
4519 			      nh_grp->nhgi->count);
4520 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
4521 	if (err)
4522 		goto out;
4523 
4524 	for (i = 0; i < nh_grp->nhgi->count; i++) {
4525 		if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
4526 			continue;
4527 		bitmap_set(activity, i, 1);
4528 	}
4529 
4530 out:
4531 	kfree(ratrad_pl);
4532 }
4533 
4534 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
4535 
4536 static void
4537 mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
4538 				const struct mlxsw_sp_nexthop_group *nh_grp)
4539 {
4540 	unsigned long *activity;
4541 
4542 	activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
4543 	if (!activity)
4544 		return;
4545 
4546 	mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
4547 	nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4548 					nh_grp->nhgi->count, activity);
4549 
4550 	bitmap_free(activity);
4551 }
4552 
4553 static void
4554 mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
4555 {
4556 	unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
4557 
4558 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
4559 			       msecs_to_jiffies(interval));
4560 }
4561 
4562 static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
4563 {
4564 	struct mlxsw_sp_nexthop_group_info *nhgi;
4565 	struct mlxsw_sp_router *router;
4566 	bool reschedule = false;
4567 
4568 	router = container_of(work, struct mlxsw_sp_router,
4569 			      nh_grp_activity_dw.work);
4570 
4571 	mutex_lock(&router->lock);
4572 
4573 	list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
4574 		mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
4575 		reschedule = true;
4576 	}
4577 
4578 	mutex_unlock(&router->lock);
4579 
4580 	if (!reschedule)
4581 		return;
4582 	mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
4583 }
4584 
4585 static int
4586 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4587 				     const struct nh_notifier_single_info *nh,
4588 				     struct netlink_ext_ack *extack)
4589 {
4590 	int err = -EINVAL;
4591 
4592 	if (nh->is_fdb)
4593 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4594 	else if (nh->has_encap)
4595 		NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4596 	else
4597 		err = 0;
4598 
4599 	return err;
4600 }
4601 
4602 static int
4603 mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
4604 					  const struct nh_notifier_single_info *nh,
4605 					  struct netlink_ext_ack *extack)
4606 {
4607 	int err;
4608 
4609 	err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
4610 	if (err)
4611 		return err;
4612 
4613 	/* Device only nexthops with an IPIP device are programmed as
4614 	 * encapsulating adjacency entries.
4615 	 */
4616 	if (!nh->gw_family && !nh->is_reject &&
4617 	    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4618 		NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4619 		return -EINVAL;
4620 	}
4621 
4622 	return 0;
4623 }
4624 
4625 static int
4626 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4627 				    const struct nh_notifier_grp_info *nh_grp,
4628 				    struct netlink_ext_ack *extack)
4629 {
4630 	int i;
4631 
4632 	if (nh_grp->is_fdb) {
4633 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4634 		return -EINVAL;
4635 	}
4636 
4637 	for (i = 0; i < nh_grp->num_nh; i++) {
4638 		const struct nh_notifier_single_info *nh;
4639 		int err;
4640 
4641 		nh = &nh_grp->nh_entries[i].nh;
4642 		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4643 								extack);
4644 		if (err)
4645 			return err;
4646 	}
4647 
4648 	return 0;
4649 }
4650 
4651 static int
4652 mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
4653 					     const struct nh_notifier_res_table_info *nh_res_table,
4654 					     struct netlink_ext_ack *extack)
4655 {
4656 	unsigned int alloc_size;
4657 	bool valid_size = false;
4658 	int err, i;
4659 
4660 	if (nh_res_table->num_nh_buckets < 32) {
4661 		NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
4662 		return -EINVAL;
4663 	}
4664 
4665 	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
4666 		const struct mlxsw_sp_adj_grp_size_range *size_range;
4667 
4668 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
4669 
4670 		if (nh_res_table->num_nh_buckets >= size_range->start &&
4671 		    nh_res_table->num_nh_buckets <= size_range->end) {
4672 			valid_size = true;
4673 			break;
4674 		}
4675 	}
4676 
4677 	if (!valid_size) {
4678 		NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
4679 		return -EINVAL;
4680 	}
4681 
4682 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
4683 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4684 					      nh_res_table->num_nh_buckets,
4685 					      &alloc_size);
4686 	if (err || nh_res_table->num_nh_buckets != alloc_size) {
4687 		NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
4688 		return -EINVAL;
4689 	}
4690 
4691 	return 0;
4692 }
4693 
4694 static int
4695 mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
4696 					const struct nh_notifier_res_table_info *nh_res_table,
4697 					struct netlink_ext_ack *extack)
4698 {
4699 	int err;
4700 	u16 i;
4701 
4702 	err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
4703 							   nh_res_table,
4704 							   extack);
4705 	if (err)
4706 		return err;
4707 
4708 	for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
4709 		const struct nh_notifier_single_info *nh;
4710 		int err;
4711 
4712 		nh = &nh_res_table->nhs[i];
4713 		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4714 								extack);
4715 		if (err)
4716 			return err;
4717 	}
4718 
4719 	return 0;
4720 }
4721 
4722 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4723 					 unsigned long event,
4724 					 struct nh_notifier_info *info)
4725 {
4726 	struct nh_notifier_single_info *nh;
4727 
4728 	if (event != NEXTHOP_EVENT_REPLACE &&
4729 	    event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
4730 	    event != NEXTHOP_EVENT_BUCKET_REPLACE)
4731 		return 0;
4732 
4733 	switch (info->type) {
4734 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
4735 		return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4736 							    info->extack);
4737 	case NH_NOTIFIER_INFO_TYPE_GRP:
4738 		return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
4739 							   info->nh_grp,
4740 							   info->extack);
4741 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4742 		return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
4743 							       info->nh_res_table,
4744 							       info->extack);
4745 	case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
4746 		nh = &info->nh_res_bucket->new_nh;
4747 		return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4748 								 info->extack);
4749 	default:
4750 		NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
4751 		return -EOPNOTSUPP;
4752 	}
4753 }
4754 
4755 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4756 					    const struct nh_notifier_info *info)
4757 {
4758 	const struct net_device *dev;
4759 
4760 	switch (info->type) {
4761 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
4762 		dev = info->nh->dev;
4763 		return info->nh->gw_family || info->nh->is_reject ||
4764 		       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4765 	case NH_NOTIFIER_INFO_TYPE_GRP:
4766 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4767 		/* Already validated earlier. */
4768 		return true;
4769 	default:
4770 		return false;
4771 	}
4772 }
4773 
4774 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4775 						struct mlxsw_sp_nexthop *nh)
4776 {
4777 	u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
4778 
4779 	nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
4780 	nh->should_offload = 1;
4781 	/* While nexthops that discard packets do not forward packets
4782 	 * via an egress RIF, they still need to be programmed using a
4783 	 * valid RIF, so use the loopback RIF created during init.
4784 	 */
4785 	nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
4786 }
4787 
4788 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4789 						struct mlxsw_sp_nexthop *nh)
4790 {
4791 	nh->rif = NULL;
4792 	nh->should_offload = 0;
4793 }
4794 
4795 static int
4796 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4797 			  struct mlxsw_sp_nexthop_group *nh_grp,
4798 			  struct mlxsw_sp_nexthop *nh,
4799 			  struct nh_notifier_single_info *nh_obj, int weight)
4800 {
4801 	struct net_device *dev = nh_obj->dev;
4802 	int err;
4803 
4804 	nh->nhgi = nh_grp->nhgi;
4805 	nh->nh_weight = weight;
4806 
4807 	switch (nh_obj->gw_family) {
4808 	case AF_INET:
4809 		memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4810 		nh->neigh_tbl = &arp_tbl;
4811 		break;
4812 	case AF_INET6:
4813 		memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
4814 #if IS_ENABLED(CONFIG_IPV6)
4815 		nh->neigh_tbl = &nd_tbl;
4816 #endif
4817 		break;
4818 	}
4819 
4820 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4821 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4822 	nh->ifindex = dev->ifindex;
4823 
4824 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4825 	if (err)
4826 		goto err_type_init;
4827 
4828 	if (nh_obj->is_reject)
4829 		mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
4830 
4831 	/* In a resilient nexthop group, all the nexthops must be written to
4832 	 * the adjacency table. Even if they do not have a valid neighbour or
4833 	 * RIF.
4834 	 */
4835 	if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
4836 		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4837 		nh->should_offload = 1;
4838 	}
4839 
4840 	return 0;
4841 
4842 err_type_init:
4843 	list_del(&nh->router_list_node);
4844 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4845 	return err;
4846 }
4847 
4848 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
4849 				      struct mlxsw_sp_nexthop *nh)
4850 {
4851 	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
4852 		mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
4853 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4854 	list_del(&nh->router_list_node);
4855 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4856 	nh->should_offload = 0;
4857 }
4858 
4859 static int
4860 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
4861 				     struct mlxsw_sp_nexthop_group *nh_grp,
4862 				     struct nh_notifier_info *info)
4863 {
4864 	struct mlxsw_sp_nexthop_group_info *nhgi;
4865 	struct mlxsw_sp_nexthop *nh;
4866 	bool is_resilient = false;
4867 	unsigned int nhs;
4868 	int err, i;
4869 
4870 	switch (info->type) {
4871 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
4872 		nhs = 1;
4873 		break;
4874 	case NH_NOTIFIER_INFO_TYPE_GRP:
4875 		nhs = info->nh_grp->num_nh;
4876 		break;
4877 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4878 		nhs = info->nh_res_table->num_nh_buckets;
4879 		is_resilient = true;
4880 		break;
4881 	default:
4882 		return -EINVAL;
4883 	}
4884 
4885 	nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4886 	if (!nhgi)
4887 		return -ENOMEM;
4888 	nh_grp->nhgi = nhgi;
4889 	nhgi->nh_grp = nh_grp;
4890 	nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
4891 	nhgi->is_resilient = is_resilient;
4892 	nhgi->count = nhs;
4893 	for (i = 0; i < nhgi->count; i++) {
4894 		struct nh_notifier_single_info *nh_obj;
4895 		int weight;
4896 
4897 		nh = &nhgi->nexthops[i];
4898 		switch (info->type) {
4899 		case NH_NOTIFIER_INFO_TYPE_SINGLE:
4900 			nh_obj = info->nh;
4901 			weight = 1;
4902 			break;
4903 		case NH_NOTIFIER_INFO_TYPE_GRP:
4904 			nh_obj = &info->nh_grp->nh_entries[i].nh;
4905 			weight = info->nh_grp->nh_entries[i].weight;
4906 			break;
4907 		case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4908 			nh_obj = &info->nh_res_table->nhs[i];
4909 			weight = 1;
4910 			break;
4911 		default:
4912 			err = -EINVAL;
4913 			goto err_nexthop_obj_init;
4914 		}
4915 		err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
4916 						weight);
4917 		if (err)
4918 			goto err_nexthop_obj_init;
4919 	}
4920 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
4921 	if (err)
4922 		goto err_group_inc;
4923 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4924 	if (err) {
4925 		NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
4926 		goto err_group_refresh;
4927 	}
4928 
4929 	/* Add resilient nexthop groups to a list so that the activity of their
4930 	 * nexthop buckets will be periodically queried and cleared.
4931 	 */
4932 	if (nhgi->is_resilient) {
4933 		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
4934 			mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
4935 		list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
4936 	}
4937 
4938 	return 0;
4939 
4940 err_group_refresh:
4941 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
4942 err_group_inc:
4943 	i = nhgi->count;
4944 err_nexthop_obj_init:
4945 	for (i--; i >= 0; i--) {
4946 		nh = &nhgi->nexthops[i];
4947 		mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4948 	}
4949 	kfree(nhgi);
4950 	return err;
4951 }
4952 
4953 static void
4954 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
4955 				     struct mlxsw_sp_nexthop_group *nh_grp)
4956 {
4957 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4958 	struct mlxsw_sp_router *router = mlxsw_sp->router;
4959 	int i;
4960 
4961 	if (nhgi->is_resilient) {
4962 		list_del(&nhgi->list);
4963 		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
4964 			cancel_delayed_work(&router->nh_grp_activity_dw);
4965 	}
4966 
4967 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
4968 	for (i = nhgi->count - 1; i >= 0; i--) {
4969 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4970 
4971 		mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4972 	}
4973 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4974 	WARN_ON_ONCE(nhgi->adj_index_valid);
4975 	kfree(nhgi);
4976 }
4977 
4978 static struct mlxsw_sp_nexthop_group *
4979 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
4980 				  struct nh_notifier_info *info)
4981 {
4982 	struct mlxsw_sp_nexthop_group *nh_grp;
4983 	int err;
4984 
4985 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
4986 	if (!nh_grp)
4987 		return ERR_PTR(-ENOMEM);
4988 	INIT_LIST_HEAD(&nh_grp->vr_list);
4989 	err = rhashtable_init(&nh_grp->vr_ht,
4990 			      &mlxsw_sp_nexthop_group_vr_ht_params);
4991 	if (err)
4992 		goto err_nexthop_group_vr_ht_init;
4993 	INIT_LIST_HEAD(&nh_grp->fib_list);
4994 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4995 	nh_grp->obj.id = info->id;
4996 
4997 	err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
4998 	if (err)
4999 		goto err_nexthop_group_info_init;
5000 
5001 	nh_grp->can_destroy = false;
5002 
5003 	return nh_grp;
5004 
5005 err_nexthop_group_info_init:
5006 	rhashtable_destroy(&nh_grp->vr_ht);
5007 err_nexthop_group_vr_ht_init:
5008 	kfree(nh_grp);
5009 	return ERR_PTR(err);
5010 }
5011 
5012 static void
5013 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
5014 				   struct mlxsw_sp_nexthop_group *nh_grp)
5015 {
5016 	if (!nh_grp->can_destroy)
5017 		return;
5018 	mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
5019 	WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
5020 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5021 	rhashtable_destroy(&nh_grp->vr_ht);
5022 	kfree(nh_grp);
5023 }
5024 
5025 static struct mlxsw_sp_nexthop_group *
5026 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
5027 {
5028 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
5029 
5030 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5031 	cmp_arg.id = id;
5032 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
5033 				      &cmp_arg,
5034 				      mlxsw_sp_nexthop_group_ht_params);
5035 }
5036 
5037 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
5038 					  struct mlxsw_sp_nexthop_group *nh_grp)
5039 {
5040 	return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5041 }
5042 
5043 static int
5044 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
5045 				   struct mlxsw_sp_nexthop_group *nh_grp,
5046 				   struct mlxsw_sp_nexthop_group *old_nh_grp,
5047 				   struct netlink_ext_ack *extack)
5048 {
5049 	struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
5050 	struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
5051 	int err;
5052 
5053 	old_nh_grp->nhgi = new_nhgi;
5054 	new_nhgi->nh_grp = old_nh_grp;
5055 	nh_grp->nhgi = old_nhgi;
5056 	old_nhgi->nh_grp = nh_grp;
5057 
5058 	if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5059 		/* Both the old adjacency index and the new one are valid.
5060 		 * Routes are currently using the old one. Tell the device to
5061 		 * replace the old adjacency index with the new one.
5062 		 */
5063 		err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
5064 						     old_nhgi->adj_index,
5065 						     old_nhgi->ecmp_size);
5066 		if (err) {
5067 			NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
5068 			goto err_out;
5069 		}
5070 	} else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
5071 		/* The old adjacency index is valid, while the new one is not.
5072 		 * Iterate over all the routes using the group and change them
5073 		 * to trap packets to the CPU.
5074 		 */
5075 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5076 		if (err) {
5077 			NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
5078 			goto err_out;
5079 		}
5080 	} else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5081 		/* The old adjacency index is invalid, while the new one is.
5082 		 * Iterate over all the routes using the group and change them
5083 		 * to forward packets using the new valid index.
5084 		 */
5085 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5086 		if (err) {
5087 			NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
5088 			goto err_out;
5089 		}
5090 	}
5091 
5092 	/* Make sure the flags are set / cleared based on the new nexthop group
5093 	 * information.
5094 	 */
5095 	mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
5096 
5097 	/* At this point 'nh_grp' is just a shell that is not used by anyone
5098 	 * and its nexthop group info is the old info that was just replaced
5099 	 * with the new one. Remove it.
5100 	 */
5101 	nh_grp->can_destroy = true;
5102 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5103 
5104 	return 0;
5105 
5106 err_out:
5107 	old_nhgi->nh_grp = old_nh_grp;
5108 	nh_grp->nhgi = new_nhgi;
5109 	new_nhgi->nh_grp = nh_grp;
5110 	old_nh_grp->nhgi = old_nhgi;
5111 	return err;
5112 }
5113 
5114 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
5115 				    struct nh_notifier_info *info)
5116 {
5117 	struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
5118 	struct netlink_ext_ack *extack = info->extack;
5119 	int err;
5120 
5121 	nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
5122 	if (IS_ERR(nh_grp))
5123 		return PTR_ERR(nh_grp);
5124 
5125 	old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5126 	if (!old_nh_grp)
5127 		err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
5128 	else
5129 		err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
5130 							 old_nh_grp, extack);
5131 
5132 	if (err) {
5133 		nh_grp->can_destroy = true;
5134 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5135 	}
5136 
5137 	return err;
5138 }
5139 
5140 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
5141 				     struct nh_notifier_info *info)
5142 {
5143 	struct mlxsw_sp_nexthop_group *nh_grp;
5144 
5145 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5146 	if (!nh_grp)
5147 		return;
5148 
5149 	nh_grp->can_destroy = true;
5150 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5151 
5152 	/* If the group still has routes using it, then defer the delete
5153 	 * operation until the last route using it is deleted.
5154 	 */
5155 	if (!list_empty(&nh_grp->fib_list))
5156 		return;
5157 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5158 }
5159 
5160 static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
5161 					     u32 adj_index, char *ratr_pl)
5162 {
5163 	MLXSW_REG_ZERO(ratr, ratr_pl);
5164 	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5165 	mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
5166 	mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
5167 
5168 	return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5169 }
5170 
5171 static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
5172 {
5173 	/* Clear the opcode and activity on both the old and new payload as
5174 	 * they are irrelevant for the comparison.
5175 	 */
5176 	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5177 	mlxsw_reg_ratr_a_set(ratr_pl, 0);
5178 	mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
5179 	mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
5180 
5181 	/* If the contents of the adjacency entry are consistent with the
5182 	 * replacement request, then replacement was successful.
5183 	 */
5184 	if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
5185 		return 0;
5186 
5187 	return -EINVAL;
5188 }
5189 
5190 static int
5191 mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
5192 				       struct mlxsw_sp_nexthop *nh,
5193 				       struct nh_notifier_info *info)
5194 {
5195 	u16 bucket_index = info->nh_res_bucket->bucket_index;
5196 	struct netlink_ext_ack *extack = info->extack;
5197 	bool force = info->nh_res_bucket->force;
5198 	char ratr_pl_new[MLXSW_REG_RATR_LEN];
5199 	char ratr_pl[MLXSW_REG_RATR_LEN];
5200 	u32 adj_index;
5201 	int err;
5202 
5203 	/* No point in trying an atomic replacement if the idle timer interval
5204 	 * is smaller than the interval in which we query and clear activity.
5205 	 */
5206 	if (!force && info->nh_res_bucket->idle_timer_ms <
5207 	    MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
5208 		force = true;
5209 
5210 	adj_index = nh->nhgi->adj_index + bucket_index;
5211 	err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
5212 	if (err) {
5213 		NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
5214 		return err;
5215 	}
5216 
5217 	if (!force) {
5218 		err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
5219 							ratr_pl_new);
5220 		if (err) {
5221 			NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
5222 			return err;
5223 		}
5224 
5225 		err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
5226 		if (err) {
5227 			NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
5228 			return err;
5229 		}
5230 	}
5231 
5232 	nh->update = 0;
5233 	nh->offloaded = 1;
5234 	mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
5235 
5236 	return 0;
5237 }
5238 
5239 static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
5240 					       struct nh_notifier_info *info)
5241 {
5242 	u16 bucket_index = info->nh_res_bucket->bucket_index;
5243 	struct netlink_ext_ack *extack = info->extack;
5244 	struct mlxsw_sp_nexthop_group_info *nhgi;
5245 	struct nh_notifier_single_info *nh_obj;
5246 	struct mlxsw_sp_nexthop_group *nh_grp;
5247 	struct mlxsw_sp_nexthop *nh;
5248 	int err;
5249 
5250 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5251 	if (!nh_grp) {
5252 		NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
5253 		return -EINVAL;
5254 	}
5255 
5256 	nhgi = nh_grp->nhgi;
5257 
5258 	if (bucket_index >= nhgi->count) {
5259 		NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
5260 		return -EINVAL;
5261 	}
5262 
5263 	nh = &nhgi->nexthops[bucket_index];
5264 	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5265 
5266 	nh_obj = &info->nh_res_bucket->new_nh;
5267 	err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5268 	if (err) {
5269 		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
5270 		goto err_nexthop_obj_init;
5271 	}
5272 
5273 	err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
5274 	if (err)
5275 		goto err_nexthop_obj_bucket_adj_update;
5276 
5277 	return 0;
5278 
5279 err_nexthop_obj_bucket_adj_update:
5280 	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5281 err_nexthop_obj_init:
5282 	nh_obj = &info->nh_res_bucket->old_nh;
5283 	mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5284 	/* The old adjacency entry was not overwritten */
5285 	nh->update = 0;
5286 	nh->offloaded = 1;
5287 	return err;
5288 }
5289 
5290 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
5291 				      unsigned long event, void *ptr)
5292 {
5293 	struct nh_notifier_info *info = ptr;
5294 	struct mlxsw_sp_router *router;
5295 	int err = 0;
5296 
5297 	router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
5298 	err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
5299 	if (err)
5300 		goto out;
5301 
5302 	mutex_lock(&router->lock);
5303 
5304 	switch (event) {
5305 	case NEXTHOP_EVENT_REPLACE:
5306 		err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
5307 		break;
5308 	case NEXTHOP_EVENT_DEL:
5309 		mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
5310 		break;
5311 	case NEXTHOP_EVENT_BUCKET_REPLACE:
5312 		err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
5313 							  info);
5314 		break;
5315 	default:
5316 		break;
5317 	}
5318 
5319 	mutex_unlock(&router->lock);
5320 
5321 out:
5322 	return notifier_from_errno(err);
5323 }
5324 
5325 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5326 				   struct fib_info *fi)
5327 {
5328 	const struct fib_nh *nh = fib_info_nh(fi, 0);
5329 
5330 	return nh->fib_nh_gw_family ||
5331 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
5332 }
5333 
5334 static int
5335 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
5336 				  struct mlxsw_sp_nexthop_group *nh_grp)
5337 {
5338 	unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
5339 	struct mlxsw_sp_nexthop_group_info *nhgi;
5340 	struct mlxsw_sp_nexthop *nh;
5341 	int err, i;
5342 
5343 	nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5344 	if (!nhgi)
5345 		return -ENOMEM;
5346 	nh_grp->nhgi = nhgi;
5347 	nhgi->nh_grp = nh_grp;
5348 	nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
5349 	nhgi->count = nhs;
5350 	for (i = 0; i < nhgi->count; i++) {
5351 		struct fib_nh *fib_nh;
5352 
5353 		nh = &nhgi->nexthops[i];
5354 		fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
5355 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
5356 		if (err)
5357 			goto err_nexthop4_init;
5358 	}
5359 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5360 	if (err)
5361 		goto err_group_inc;
5362 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5363 	if (err)
5364 		goto err_group_refresh;
5365 
5366 	return 0;
5367 
5368 err_group_refresh:
5369 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5370 err_group_inc:
5371 	i = nhgi->count;
5372 err_nexthop4_init:
5373 	for (i--; i >= 0; i--) {
5374 		nh = &nhgi->nexthops[i];
5375 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5376 	}
5377 	kfree(nhgi);
5378 	return err;
5379 }
5380 
5381 static void
5382 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5383 				  struct mlxsw_sp_nexthop_group *nh_grp)
5384 {
5385 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5386 	int i;
5387 
5388 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5389 	for (i = nhgi->count - 1; i >= 0; i--) {
5390 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5391 
5392 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5393 	}
5394 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5395 	WARN_ON_ONCE(nhgi->adj_index_valid);
5396 	kfree(nhgi);
5397 }
5398 
5399 static struct mlxsw_sp_nexthop_group *
5400 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
5401 {
5402 	struct mlxsw_sp_nexthop_group *nh_grp;
5403 	int err;
5404 
5405 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5406 	if (!nh_grp)
5407 		return ERR_PTR(-ENOMEM);
5408 	INIT_LIST_HEAD(&nh_grp->vr_list);
5409 	err = rhashtable_init(&nh_grp->vr_ht,
5410 			      &mlxsw_sp_nexthop_group_vr_ht_params);
5411 	if (err)
5412 		goto err_nexthop_group_vr_ht_init;
5413 	INIT_LIST_HEAD(&nh_grp->fib_list);
5414 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
5415 	nh_grp->ipv4.fi = fi;
5416 	fib_info_hold(fi);
5417 
5418 	err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
5419 	if (err)
5420 		goto err_nexthop_group_info_init;
5421 
5422 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5423 	if (err)
5424 		goto err_nexthop_group_insert;
5425 
5426 	nh_grp->can_destroy = true;
5427 
5428 	return nh_grp;
5429 
5430 err_nexthop_group_insert:
5431 	mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5432 err_nexthop_group_info_init:
5433 	fib_info_put(fi);
5434 	rhashtable_destroy(&nh_grp->vr_ht);
5435 err_nexthop_group_vr_ht_init:
5436 	kfree(nh_grp);
5437 	return ERR_PTR(err);
5438 }
5439 
5440 static void
5441 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
5442 				struct mlxsw_sp_nexthop_group *nh_grp)
5443 {
5444 	if (!nh_grp->can_destroy)
5445 		return;
5446 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5447 	mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5448 	fib_info_put(nh_grp->ipv4.fi);
5449 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5450 	rhashtable_destroy(&nh_grp->vr_ht);
5451 	kfree(nh_grp);
5452 }
5453 
5454 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
5455 				       struct mlxsw_sp_fib_entry *fib_entry,
5456 				       struct fib_info *fi)
5457 {
5458 	struct mlxsw_sp_nexthop_group *nh_grp;
5459 
5460 	if (fi->nh) {
5461 		nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
5462 							   fi->nh->id);
5463 		if (WARN_ON_ONCE(!nh_grp))
5464 			return -EINVAL;
5465 		goto out;
5466 	}
5467 
5468 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
5469 	if (!nh_grp) {
5470 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
5471 		if (IS_ERR(nh_grp))
5472 			return PTR_ERR(nh_grp);
5473 	}
5474 out:
5475 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
5476 	fib_entry->nh_group = nh_grp;
5477 	return 0;
5478 }
5479 
5480 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
5481 					struct mlxsw_sp_fib_entry *fib_entry)
5482 {
5483 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5484 
5485 	list_del(&fib_entry->nexthop_group_node);
5486 	if (!list_empty(&nh_grp->fib_list))
5487 		return;
5488 
5489 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
5490 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5491 		return;
5492 	}
5493 
5494 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
5495 }
5496 
5497 static bool
5498 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5499 {
5500 	struct mlxsw_sp_fib4_entry *fib4_entry;
5501 
5502 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5503 				  common);
5504 	return !fib4_entry->dscp;
5505 }
5506 
5507 static bool
5508 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5509 {
5510 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5511 
5512 	switch (fib_entry->fib_node->fib->proto) {
5513 	case MLXSW_SP_L3_PROTO_IPV4:
5514 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
5515 			return false;
5516 		break;
5517 	case MLXSW_SP_L3_PROTO_IPV6:
5518 		break;
5519 	}
5520 
5521 	switch (fib_entry->type) {
5522 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5523 		return !!nh_group->nhgi->adj_index_valid;
5524 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5525 		return !!mlxsw_sp_nhgi_rif(nh_group->nhgi);
5526 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5527 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5528 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5529 		return true;
5530 	default:
5531 		return false;
5532 	}
5533 }
5534 
5535 static struct mlxsw_sp_nexthop *
5536 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
5537 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5538 {
5539 	int i;
5540 
5541 	for (i = 0; i < nh_grp->nhgi->count; i++) {
5542 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
5543 		struct net_device *dev = mlxsw_sp_nexthop_dev(nh);
5544 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5545 
5546 		if (dev && dev == rt->fib6_nh->fib_nh_dev &&
5547 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
5548 				    &rt->fib6_nh->fib_nh_gw6))
5549 			return nh;
5550 	}
5551 
5552 	return NULL;
5553 }
5554 
5555 static void
5556 mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5557 				      struct fib_entry_notifier_info *fen_info)
5558 {
5559 	u32 *p_dst = (u32 *) &fen_info->dst;
5560 	struct fib_rt_info fri;
5561 
5562 	fri.fi = fen_info->fi;
5563 	fri.tb_id = fen_info->tb_id;
5564 	fri.dst = cpu_to_be32(*p_dst);
5565 	fri.dst_len = fen_info->dst_len;
5566 	fri.dscp = fen_info->dscp;
5567 	fri.type = fen_info->type;
5568 	fri.offload = false;
5569 	fri.trap = false;
5570 	fri.offload_failed = true;
5571 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5572 }
5573 
5574 static void
5575 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5576 				 struct mlxsw_sp_fib_entry *fib_entry)
5577 {
5578 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5579 	int dst_len = fib_entry->fib_node->key.prefix_len;
5580 	struct mlxsw_sp_fib4_entry *fib4_entry;
5581 	struct fib_rt_info fri;
5582 	bool should_offload;
5583 
5584 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5585 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5586 				  common);
5587 	fri.fi = fib4_entry->fi;
5588 	fri.tb_id = fib4_entry->tb_id;
5589 	fri.dst = cpu_to_be32(*p_dst);
5590 	fri.dst_len = dst_len;
5591 	fri.dscp = fib4_entry->dscp;
5592 	fri.type = fib4_entry->type;
5593 	fri.offload = should_offload;
5594 	fri.trap = !should_offload;
5595 	fri.offload_failed = false;
5596 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5597 }
5598 
5599 static void
5600 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5601 				   struct mlxsw_sp_fib_entry *fib_entry)
5602 {
5603 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5604 	int dst_len = fib_entry->fib_node->key.prefix_len;
5605 	struct mlxsw_sp_fib4_entry *fib4_entry;
5606 	struct fib_rt_info fri;
5607 
5608 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5609 				  common);
5610 	fri.fi = fib4_entry->fi;
5611 	fri.tb_id = fib4_entry->tb_id;
5612 	fri.dst = cpu_to_be32(*p_dst);
5613 	fri.dst_len = dst_len;
5614 	fri.dscp = fib4_entry->dscp;
5615 	fri.type = fib4_entry->type;
5616 	fri.offload = false;
5617 	fri.trap = false;
5618 	fri.offload_failed = false;
5619 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5620 }
5621 
5622 #if IS_ENABLED(CONFIG_IPV6)
5623 static void
5624 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5625 				      struct fib6_info **rt_arr,
5626 				      unsigned int nrt6)
5627 {
5628 	int i;
5629 
5630 	/* In IPv6 a multipath route is represented using multiple routes, so
5631 	 * we need to set the flags on all of them.
5632 	 */
5633 	for (i = 0; i < nrt6; i++)
5634 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
5635 				       false, false, true);
5636 }
5637 #else
5638 static void
5639 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5640 				      struct fib6_info **rt_arr,
5641 				      unsigned int nrt6)
5642 {
5643 }
5644 #endif
5645 
5646 #if IS_ENABLED(CONFIG_IPV6)
5647 static void
5648 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5649 				 struct mlxsw_sp_fib_entry *fib_entry)
5650 {
5651 	struct mlxsw_sp_fib6_entry *fib6_entry;
5652 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5653 	bool should_offload;
5654 
5655 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5656 
5657 	/* In IPv6 a multipath route is represented using multiple routes, so
5658 	 * we need to set the flags on all of them.
5659 	 */
5660 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5661 				  common);
5662 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5663 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5664 				       should_offload, !should_offload, false);
5665 }
5666 #else
5667 static void
5668 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5669 				 struct mlxsw_sp_fib_entry *fib_entry)
5670 {
5671 }
5672 #endif
5673 
5674 #if IS_ENABLED(CONFIG_IPV6)
5675 static void
5676 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5677 				   struct mlxsw_sp_fib_entry *fib_entry)
5678 {
5679 	struct mlxsw_sp_fib6_entry *fib6_entry;
5680 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5681 
5682 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5683 				  common);
5684 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5685 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5686 				       false, false, false);
5687 }
5688 #else
5689 static void
5690 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5691 				   struct mlxsw_sp_fib_entry *fib_entry)
5692 {
5693 }
5694 #endif
5695 
5696 static void
5697 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5698 				struct mlxsw_sp_fib_entry *fib_entry)
5699 {
5700 	switch (fib_entry->fib_node->fib->proto) {
5701 	case MLXSW_SP_L3_PROTO_IPV4:
5702 		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5703 		break;
5704 	case MLXSW_SP_L3_PROTO_IPV6:
5705 		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5706 		break;
5707 	}
5708 }
5709 
5710 static void
5711 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5712 				  struct mlxsw_sp_fib_entry *fib_entry)
5713 {
5714 	switch (fib_entry->fib_node->fib->proto) {
5715 	case MLXSW_SP_L3_PROTO_IPV4:
5716 		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5717 		break;
5718 	case MLXSW_SP_L3_PROTO_IPV6:
5719 		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5720 		break;
5721 	}
5722 }
5723 
5724 static void
5725 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5726 				    struct mlxsw_sp_fib_entry *fib_entry,
5727 				    enum mlxsw_reg_ralue_op op)
5728 {
5729 	switch (op) {
5730 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
5731 		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5732 		break;
5733 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
5734 		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5735 		break;
5736 	default:
5737 		break;
5738 	}
5739 }
5740 
5741 static void
5742 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
5743 			      const struct mlxsw_sp_fib_entry *fib_entry,
5744 			      enum mlxsw_reg_ralue_op op)
5745 {
5746 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5747 	enum mlxsw_reg_ralxx_protocol proto;
5748 	u32 *p_dip;
5749 
5750 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
5751 
5752 	switch (fib->proto) {
5753 	case MLXSW_SP_L3_PROTO_IPV4:
5754 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
5755 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
5756 				      fib_entry->fib_node->key.prefix_len,
5757 				      *p_dip);
5758 		break;
5759 	case MLXSW_SP_L3_PROTO_IPV6:
5760 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
5761 				      fib_entry->fib_node->key.prefix_len,
5762 				      fib_entry->fib_node->key.addr);
5763 		break;
5764 	}
5765 }
5766 
5767 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5768 					struct mlxsw_sp_fib_entry *fib_entry,
5769 					enum mlxsw_reg_ralue_op op)
5770 {
5771 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5772 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5773 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5774 	enum mlxsw_reg_ralue_trap_action trap_action;
5775 	u16 trap_id = 0;
5776 	u32 adjacency_index = 0;
5777 	u16 ecmp_size = 0;
5778 
5779 	/* In case the nexthop group adjacency index is valid, use it
5780 	 * with provided ECMP size. Otherwise, setup trap and pass
5781 	 * traffic to kernel.
5782 	 */
5783 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5784 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5785 		adjacency_index = nhgi->adj_index;
5786 		ecmp_size = nhgi->ecmp_size;
5787 	} else if (!nhgi->adj_index_valid && nhgi->count &&
5788 		   mlxsw_sp_nhgi_rif(nhgi)) {
5789 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5790 		adjacency_index = mlxsw_sp->router->adj_trap_index;
5791 		ecmp_size = 1;
5792 	} else {
5793 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5794 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5795 	}
5796 
5797 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5798 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
5799 					adjacency_index, ecmp_size);
5800 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5801 }
5802 
5803 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5804 				       struct mlxsw_sp_fib_entry *fib_entry,
5805 				       enum mlxsw_reg_ralue_op op)
5806 {
5807 	struct mlxsw_sp_rif *rif = mlxsw_sp_nhgi_rif(fib_entry->nh_group->nhgi);
5808 	enum mlxsw_reg_ralue_trap_action trap_action;
5809 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5810 	u16 trap_id = 0;
5811 	u16 rif_index = 0;
5812 
5813 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5814 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5815 		rif_index = rif->rif_index;
5816 	} else {
5817 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5818 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5819 	}
5820 
5821 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5822 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
5823 				       rif_index);
5824 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5825 }
5826 
5827 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
5828 				      struct mlxsw_sp_fib_entry *fib_entry,
5829 				      enum mlxsw_reg_ralue_op op)
5830 {
5831 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5832 
5833 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5834 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5835 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5836 }
5837 
5838 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
5839 					   struct mlxsw_sp_fib_entry *fib_entry,
5840 					   enum mlxsw_reg_ralue_op op)
5841 {
5842 	enum mlxsw_reg_ralue_trap_action trap_action;
5843 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5844 
5845 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
5846 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5847 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
5848 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5849 }
5850 
5851 static int
5852 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
5853 				  struct mlxsw_sp_fib_entry *fib_entry,
5854 				  enum mlxsw_reg_ralue_op op)
5855 {
5856 	enum mlxsw_reg_ralue_trap_action trap_action;
5857 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5858 	u16 trap_id;
5859 
5860 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5861 	trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
5862 
5863 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5864 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
5865 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5866 }
5867 
5868 static int
5869 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
5870 				 struct mlxsw_sp_fib_entry *fib_entry,
5871 				 enum mlxsw_reg_ralue_op op)
5872 {
5873 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
5874 	const struct mlxsw_sp_ipip_ops *ipip_ops;
5875 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5876 	int err;
5877 
5878 	if (WARN_ON(!ipip_entry))
5879 		return -EINVAL;
5880 
5881 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5882 	err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
5883 				     fib_entry->decap.tunnel_index);
5884 	if (err)
5885 		return err;
5886 
5887 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5888 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
5889 					   fib_entry->decap.tunnel_index);
5890 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5891 }
5892 
5893 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
5894 					   struct mlxsw_sp_fib_entry *fib_entry,
5895 					   enum mlxsw_reg_ralue_op op)
5896 {
5897 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5898 
5899 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5900 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
5901 					   fib_entry->decap.tunnel_index);
5902 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5903 }
5904 
5905 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5906 				   struct mlxsw_sp_fib_entry *fib_entry,
5907 				   enum mlxsw_reg_ralue_op op)
5908 {
5909 	switch (fib_entry->type) {
5910 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5911 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
5912 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5913 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
5914 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
5915 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
5916 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5917 		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
5918 	case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
5919 		return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
5920 							 op);
5921 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5922 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
5923 							fib_entry, op);
5924 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5925 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
5926 	}
5927 	return -EINVAL;
5928 }
5929 
5930 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5931 				 struct mlxsw_sp_fib_entry *fib_entry,
5932 				 enum mlxsw_reg_ralue_op op)
5933 {
5934 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
5935 
5936 	if (err)
5937 		return err;
5938 
5939 	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
5940 
5941 	return err;
5942 }
5943 
5944 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5945 				     struct mlxsw_sp_fib_entry *fib_entry)
5946 {
5947 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
5948 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
5949 }
5950 
5951 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
5952 				  struct mlxsw_sp_fib_entry *fib_entry)
5953 {
5954 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
5955 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
5956 }
5957 
5958 static int
5959 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5960 			     const struct fib_entry_notifier_info *fen_info,
5961 			     struct mlxsw_sp_fib_entry *fib_entry)
5962 {
5963 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
5964 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
5965 	struct mlxsw_sp_router *router = mlxsw_sp->router;
5966 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
5967 	int ifindex = nhgi->nexthops[0].ifindex;
5968 	struct mlxsw_sp_ipip_entry *ipip_entry;
5969 
5970 	switch (fen_info->type) {
5971 	case RTN_LOCAL:
5972 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
5973 							       MLXSW_SP_L3_PROTO_IPV4, dip);
5974 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
5975 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
5976 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
5977 							     fib_entry,
5978 							     ipip_entry);
5979 		}
5980 		if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
5981 						 MLXSW_SP_L3_PROTO_IPV4,
5982 						 &dip)) {
5983 			u32 tunnel_index;
5984 
5985 			tunnel_index = router->nve_decap_config.tunnel_index;
5986 			fib_entry->decap.tunnel_index = tunnel_index;
5987 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
5988 			return 0;
5989 		}
5990 		fallthrough;
5991 	case RTN_BROADCAST:
5992 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5993 		return 0;
5994 	case RTN_BLACKHOLE:
5995 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5996 		return 0;
5997 	case RTN_UNREACHABLE:
5998 	case RTN_PROHIBIT:
5999 		/* Packets hitting these routes need to be trapped, but
6000 		 * can do so with a lower priority than packets directed
6001 		 * at the host, so use action type local instead of trap.
6002 		 */
6003 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6004 		return 0;
6005 	case RTN_UNICAST:
6006 		if (nhgi->gateway)
6007 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6008 		else
6009 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6010 		return 0;
6011 	default:
6012 		return -EINVAL;
6013 	}
6014 }
6015 
6016 static void
6017 mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6018 			      struct mlxsw_sp_fib_entry *fib_entry)
6019 {
6020 	switch (fib_entry->type) {
6021 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6022 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
6023 		break;
6024 	default:
6025 		break;
6026 	}
6027 }
6028 
6029 static void
6030 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6031 			       struct mlxsw_sp_fib4_entry *fib4_entry)
6032 {
6033 	mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common);
6034 }
6035 
6036 static struct mlxsw_sp_fib4_entry *
6037 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
6038 			   struct mlxsw_sp_fib_node *fib_node,
6039 			   const struct fib_entry_notifier_info *fen_info)
6040 {
6041 	struct mlxsw_sp_fib4_entry *fib4_entry;
6042 	struct mlxsw_sp_fib_entry *fib_entry;
6043 	int err;
6044 
6045 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
6046 	if (!fib4_entry)
6047 		return ERR_PTR(-ENOMEM);
6048 	fib_entry = &fib4_entry->common;
6049 
6050 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
6051 	if (err)
6052 		goto err_nexthop4_group_get;
6053 
6054 	err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6055 					     fib_node->fib);
6056 	if (err)
6057 		goto err_nexthop_group_vr_link;
6058 
6059 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
6060 	if (err)
6061 		goto err_fib4_entry_type_set;
6062 
6063 	fib4_entry->fi = fen_info->fi;
6064 	fib_info_hold(fib4_entry->fi);
6065 	fib4_entry->tb_id = fen_info->tb_id;
6066 	fib4_entry->type = fen_info->type;
6067 	fib4_entry->dscp = fen_info->dscp;
6068 
6069 	fib_entry->fib_node = fib_node;
6070 
6071 	return fib4_entry;
6072 
6073 err_fib4_entry_type_set:
6074 	mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6075 err_nexthop_group_vr_link:
6076 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6077 err_nexthop4_group_get:
6078 	kfree(fib4_entry);
6079 	return ERR_PTR(err);
6080 }
6081 
6082 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6083 					struct mlxsw_sp_fib4_entry *fib4_entry)
6084 {
6085 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6086 
6087 	fib_info_put(fib4_entry->fi);
6088 	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry);
6089 	mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
6090 					 fib_node->fib);
6091 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6092 	kfree(fib4_entry);
6093 }
6094 
6095 static struct mlxsw_sp_fib4_entry *
6096 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6097 			   const struct fib_entry_notifier_info *fen_info)
6098 {
6099 	struct mlxsw_sp_fib4_entry *fib4_entry;
6100 	struct mlxsw_sp_fib_node *fib_node;
6101 	struct mlxsw_sp_fib *fib;
6102 	struct mlxsw_sp_vr *vr;
6103 
6104 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
6105 	if (!vr)
6106 		return NULL;
6107 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
6108 
6109 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
6110 					    sizeof(fen_info->dst),
6111 					    fen_info->dst_len);
6112 	if (!fib_node)
6113 		return NULL;
6114 
6115 	fib4_entry = container_of(fib_node->fib_entry,
6116 				  struct mlxsw_sp_fib4_entry, common);
6117 	if (fib4_entry->tb_id == fen_info->tb_id &&
6118 	    fib4_entry->dscp == fen_info->dscp &&
6119 	    fib4_entry->type == fen_info->type &&
6120 	    fib4_entry->fi == fen_info->fi)
6121 		return fib4_entry;
6122 
6123 	return NULL;
6124 }
6125 
6126 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
6127 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
6128 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
6129 	.key_len = sizeof(struct mlxsw_sp_fib_key),
6130 	.automatic_shrinking = true,
6131 };
6132 
6133 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
6134 				    struct mlxsw_sp_fib_node *fib_node)
6135 {
6136 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
6137 				      mlxsw_sp_fib_ht_params);
6138 }
6139 
6140 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
6141 				     struct mlxsw_sp_fib_node *fib_node)
6142 {
6143 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
6144 			       mlxsw_sp_fib_ht_params);
6145 }
6146 
6147 static struct mlxsw_sp_fib_node *
6148 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
6149 			 size_t addr_len, unsigned char prefix_len)
6150 {
6151 	struct mlxsw_sp_fib_key key;
6152 
6153 	memset(&key, 0, sizeof(key));
6154 	memcpy(key.addr, addr, addr_len);
6155 	key.prefix_len = prefix_len;
6156 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
6157 }
6158 
6159 static struct mlxsw_sp_fib_node *
6160 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
6161 			 size_t addr_len, unsigned char prefix_len)
6162 {
6163 	struct mlxsw_sp_fib_node *fib_node;
6164 
6165 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
6166 	if (!fib_node)
6167 		return NULL;
6168 
6169 	list_add(&fib_node->list, &fib->node_list);
6170 	memcpy(fib_node->key.addr, addr, addr_len);
6171 	fib_node->key.prefix_len = prefix_len;
6172 
6173 	return fib_node;
6174 }
6175 
6176 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
6177 {
6178 	list_del(&fib_node->list);
6179 	kfree(fib_node);
6180 }
6181 
6182 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
6183 				      struct mlxsw_sp_fib_node *fib_node)
6184 {
6185 	struct mlxsw_sp_prefix_usage req_prefix_usage;
6186 	struct mlxsw_sp_fib *fib = fib_node->fib;
6187 	struct mlxsw_sp_lpm_tree *lpm_tree;
6188 	int err;
6189 
6190 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
6191 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6192 		goto out;
6193 
6194 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6195 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
6196 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6197 					 fib->proto);
6198 	if (IS_ERR(lpm_tree))
6199 		return PTR_ERR(lpm_tree);
6200 
6201 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6202 	if (err)
6203 		goto err_lpm_tree_replace;
6204 
6205 out:
6206 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
6207 	return 0;
6208 
6209 err_lpm_tree_replace:
6210 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6211 	return err;
6212 }
6213 
6214 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
6215 					 struct mlxsw_sp_fib_node *fib_node)
6216 {
6217 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
6218 	struct mlxsw_sp_prefix_usage req_prefix_usage;
6219 	struct mlxsw_sp_fib *fib = fib_node->fib;
6220 	int err;
6221 
6222 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6223 		return;
6224 	/* Try to construct a new LPM tree from the current prefix usage
6225 	 * minus the unused one. If we fail, continue using the old one.
6226 	 */
6227 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6228 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
6229 				    fib_node->key.prefix_len);
6230 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6231 					 fib->proto);
6232 	if (IS_ERR(lpm_tree))
6233 		return;
6234 
6235 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6236 	if (err)
6237 		goto err_lpm_tree_replace;
6238 
6239 	return;
6240 
6241 err_lpm_tree_replace:
6242 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6243 }
6244 
6245 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
6246 				  struct mlxsw_sp_fib_node *fib_node,
6247 				  struct mlxsw_sp_fib *fib)
6248 {
6249 	int err;
6250 
6251 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
6252 	if (err)
6253 		return err;
6254 	fib_node->fib = fib;
6255 
6256 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
6257 	if (err)
6258 		goto err_fib_lpm_tree_link;
6259 
6260 	return 0;
6261 
6262 err_fib_lpm_tree_link:
6263 	fib_node->fib = NULL;
6264 	mlxsw_sp_fib_node_remove(fib, fib_node);
6265 	return err;
6266 }
6267 
6268 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
6269 				   struct mlxsw_sp_fib_node *fib_node)
6270 {
6271 	struct mlxsw_sp_fib *fib = fib_node->fib;
6272 
6273 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
6274 	fib_node->fib = NULL;
6275 	mlxsw_sp_fib_node_remove(fib, fib_node);
6276 }
6277 
6278 static struct mlxsw_sp_fib_node *
6279 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
6280 		      size_t addr_len, unsigned char prefix_len,
6281 		      enum mlxsw_sp_l3proto proto)
6282 {
6283 	struct mlxsw_sp_fib_node *fib_node;
6284 	struct mlxsw_sp_fib *fib;
6285 	struct mlxsw_sp_vr *vr;
6286 	int err;
6287 
6288 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
6289 	if (IS_ERR(vr))
6290 		return ERR_CAST(vr);
6291 	fib = mlxsw_sp_vr_fib(vr, proto);
6292 
6293 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
6294 	if (fib_node)
6295 		return fib_node;
6296 
6297 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
6298 	if (!fib_node) {
6299 		err = -ENOMEM;
6300 		goto err_fib_node_create;
6301 	}
6302 
6303 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
6304 	if (err)
6305 		goto err_fib_node_init;
6306 
6307 	return fib_node;
6308 
6309 err_fib_node_init:
6310 	mlxsw_sp_fib_node_destroy(fib_node);
6311 err_fib_node_create:
6312 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6313 	return ERR_PTR(err);
6314 }
6315 
6316 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
6317 				  struct mlxsw_sp_fib_node *fib_node)
6318 {
6319 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
6320 
6321 	if (fib_node->fib_entry)
6322 		return;
6323 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
6324 	mlxsw_sp_fib_node_destroy(fib_node);
6325 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6326 }
6327 
6328 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
6329 					struct mlxsw_sp_fib_entry *fib_entry)
6330 {
6331 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6332 	int err;
6333 
6334 	fib_node->fib_entry = fib_entry;
6335 
6336 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
6337 	if (err)
6338 		goto err_fib_entry_update;
6339 
6340 	return 0;
6341 
6342 err_fib_entry_update:
6343 	fib_node->fib_entry = NULL;
6344 	return err;
6345 }
6346 
6347 static void
6348 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6349 			       struct mlxsw_sp_fib_entry *fib_entry)
6350 {
6351 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6352 
6353 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
6354 	fib_node->fib_entry = NULL;
6355 }
6356 
6357 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
6358 {
6359 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6360 	struct mlxsw_sp_fib4_entry *fib4_replaced;
6361 
6362 	if (!fib_node->fib_entry)
6363 		return true;
6364 
6365 	fib4_replaced = container_of(fib_node->fib_entry,
6366 				     struct mlxsw_sp_fib4_entry, common);
6367 	if (fib4_entry->tb_id == RT_TABLE_MAIN &&
6368 	    fib4_replaced->tb_id == RT_TABLE_LOCAL)
6369 		return false;
6370 
6371 	return true;
6372 }
6373 
6374 static int
6375 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
6376 			     const struct fib_entry_notifier_info *fen_info)
6377 {
6378 	struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
6379 	struct mlxsw_sp_fib_entry *replaced;
6380 	struct mlxsw_sp_fib_node *fib_node;
6381 	int err;
6382 
6383 	if (fen_info->fi->nh &&
6384 	    !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
6385 		return 0;
6386 
6387 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
6388 					 &fen_info->dst, sizeof(fen_info->dst),
6389 					 fen_info->dst_len,
6390 					 MLXSW_SP_L3_PROTO_IPV4);
6391 	if (IS_ERR(fib_node)) {
6392 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
6393 		return PTR_ERR(fib_node);
6394 	}
6395 
6396 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
6397 	if (IS_ERR(fib4_entry)) {
6398 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
6399 		err = PTR_ERR(fib4_entry);
6400 		goto err_fib4_entry_create;
6401 	}
6402 
6403 	if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
6404 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6405 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6406 		return 0;
6407 	}
6408 
6409 	replaced = fib_node->fib_entry;
6410 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
6411 	if (err) {
6412 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
6413 		goto err_fib_node_entry_link;
6414 	}
6415 
6416 	/* Nothing to replace */
6417 	if (!replaced)
6418 		return 0;
6419 
6420 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6421 	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
6422 				     common);
6423 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
6424 
6425 	return 0;
6426 
6427 err_fib_node_entry_link:
6428 	fib_node->fib_entry = replaced;
6429 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6430 err_fib4_entry_create:
6431 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6432 	return err;
6433 }
6434 
6435 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
6436 				     struct fib_entry_notifier_info *fen_info)
6437 {
6438 	struct mlxsw_sp_fib4_entry *fib4_entry;
6439 	struct mlxsw_sp_fib_node *fib_node;
6440 
6441 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
6442 	if (!fib4_entry)
6443 		return;
6444 	fib_node = fib4_entry->common.fib_node;
6445 
6446 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
6447 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6448 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6449 }
6450 
6451 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
6452 {
6453 	/* Multicast routes aren't supported, so ignore them. Neighbour
6454 	 * Discovery packets are specifically trapped.
6455 	 */
6456 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
6457 		return true;
6458 
6459 	/* Cloned routes are irrelevant in the forwarding path. */
6460 	if (rt->fib6_flags & RTF_CACHE)
6461 		return true;
6462 
6463 	return false;
6464 }
6465 
6466 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
6467 {
6468 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6469 
6470 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
6471 	if (!mlxsw_sp_rt6)
6472 		return ERR_PTR(-ENOMEM);
6473 
6474 	/* In case of route replace, replaced route is deleted with
6475 	 * no notification. Take reference to prevent accessing freed
6476 	 * memory.
6477 	 */
6478 	mlxsw_sp_rt6->rt = rt;
6479 	fib6_info_hold(rt);
6480 
6481 	return mlxsw_sp_rt6;
6482 }
6483 
6484 #if IS_ENABLED(CONFIG_IPV6)
6485 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6486 {
6487 	fib6_info_release(rt);
6488 }
6489 #else
6490 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6491 {
6492 }
6493 #endif
6494 
6495 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6496 {
6497 	struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6498 
6499 	if (!mlxsw_sp_rt6->rt->nh)
6500 		fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6501 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6502 	kfree(mlxsw_sp_rt6);
6503 }
6504 
6505 static struct fib6_info *
6506 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6507 {
6508 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6509 				list)->rt;
6510 }
6511 
6512 static struct mlxsw_sp_rt6 *
6513 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6514 			    const struct fib6_info *rt)
6515 {
6516 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6517 
6518 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6519 		if (mlxsw_sp_rt6->rt == rt)
6520 			return mlxsw_sp_rt6;
6521 	}
6522 
6523 	return NULL;
6524 }
6525 
6526 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6527 					const struct fib6_info *rt,
6528 					enum mlxsw_sp_ipip_type *ret)
6529 {
6530 	return rt->fib6_nh->fib_nh_dev &&
6531 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6532 }
6533 
6534 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6535 				  struct mlxsw_sp_nexthop_group *nh_grp,
6536 				  struct mlxsw_sp_nexthop *nh,
6537 				  const struct fib6_info *rt)
6538 {
6539 	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6540 	int err;
6541 
6542 	nh->nhgi = nh_grp->nhgi;
6543 	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6544 	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6545 #if IS_ENABLED(CONFIG_IPV6)
6546 	nh->neigh_tbl = &nd_tbl;
6547 #endif
6548 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6549 
6550 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6551 
6552 	if (!dev)
6553 		return 0;
6554 	nh->ifindex = dev->ifindex;
6555 
6556 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6557 	if (err)
6558 		goto err_nexthop_type_init;
6559 
6560 	return 0;
6561 
6562 err_nexthop_type_init:
6563 	list_del(&nh->router_list_node);
6564 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6565 	return err;
6566 }
6567 
6568 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6569 				   struct mlxsw_sp_nexthop *nh)
6570 {
6571 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6572 	list_del(&nh->router_list_node);
6573 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6574 }
6575 
6576 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6577 				    const struct fib6_info *rt)
6578 {
6579 	return rt->fib6_nh->fib_nh_gw_family ||
6580 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6581 }
6582 
6583 static int
6584 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6585 				  struct mlxsw_sp_nexthop_group *nh_grp,
6586 				  struct mlxsw_sp_fib6_entry *fib6_entry)
6587 {
6588 	struct mlxsw_sp_nexthop_group_info *nhgi;
6589 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6590 	struct mlxsw_sp_nexthop *nh;
6591 	int err, i;
6592 
6593 	nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6594 		       GFP_KERNEL);
6595 	if (!nhgi)
6596 		return -ENOMEM;
6597 	nh_grp->nhgi = nhgi;
6598 	nhgi->nh_grp = nh_grp;
6599 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6600 					struct mlxsw_sp_rt6, list);
6601 	nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6602 	nhgi->count = fib6_entry->nrt6;
6603 	for (i = 0; i < nhgi->count; i++) {
6604 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
6605 
6606 		nh = &nhgi->nexthops[i];
6607 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6608 		if (err)
6609 			goto err_nexthop6_init;
6610 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6611 	}
6612 	nh_grp->nhgi = nhgi;
6613 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
6614 	if (err)
6615 		goto err_group_inc;
6616 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6617 	if (err)
6618 		goto err_group_refresh;
6619 
6620 	return 0;
6621 
6622 err_group_refresh:
6623 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6624 err_group_inc:
6625 	i = nhgi->count;
6626 err_nexthop6_init:
6627 	for (i--; i >= 0; i--) {
6628 		nh = &nhgi->nexthops[i];
6629 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6630 	}
6631 	kfree(nhgi);
6632 	return err;
6633 }
6634 
6635 static void
6636 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6637 				  struct mlxsw_sp_nexthop_group *nh_grp)
6638 {
6639 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6640 	int i;
6641 
6642 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6643 	for (i = nhgi->count - 1; i >= 0; i--) {
6644 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6645 
6646 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6647 	}
6648 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6649 	WARN_ON_ONCE(nhgi->adj_index_valid);
6650 	kfree(nhgi);
6651 }
6652 
6653 static struct mlxsw_sp_nexthop_group *
6654 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6655 			       struct mlxsw_sp_fib6_entry *fib6_entry)
6656 {
6657 	struct mlxsw_sp_nexthop_group *nh_grp;
6658 	int err;
6659 
6660 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6661 	if (!nh_grp)
6662 		return ERR_PTR(-ENOMEM);
6663 	INIT_LIST_HEAD(&nh_grp->vr_list);
6664 	err = rhashtable_init(&nh_grp->vr_ht,
6665 			      &mlxsw_sp_nexthop_group_vr_ht_params);
6666 	if (err)
6667 		goto err_nexthop_group_vr_ht_init;
6668 	INIT_LIST_HEAD(&nh_grp->fib_list);
6669 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6670 
6671 	err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6672 	if (err)
6673 		goto err_nexthop_group_info_init;
6674 
6675 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6676 	if (err)
6677 		goto err_nexthop_group_insert;
6678 
6679 	nh_grp->can_destroy = true;
6680 
6681 	return nh_grp;
6682 
6683 err_nexthop_group_insert:
6684 	mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6685 err_nexthop_group_info_init:
6686 	rhashtable_destroy(&nh_grp->vr_ht);
6687 err_nexthop_group_vr_ht_init:
6688 	kfree(nh_grp);
6689 	return ERR_PTR(err);
6690 }
6691 
6692 static void
6693 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6694 				struct mlxsw_sp_nexthop_group *nh_grp)
6695 {
6696 	if (!nh_grp->can_destroy)
6697 		return;
6698 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6699 	mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6700 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6701 	rhashtable_destroy(&nh_grp->vr_ht);
6702 	kfree(nh_grp);
6703 }
6704 
6705 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6706 				       struct mlxsw_sp_fib6_entry *fib6_entry)
6707 {
6708 	struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6709 	struct mlxsw_sp_nexthop_group *nh_grp;
6710 
6711 	if (rt->nh) {
6712 		nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6713 							   rt->nh->id);
6714 		if (WARN_ON_ONCE(!nh_grp))
6715 			return -EINVAL;
6716 		goto out;
6717 	}
6718 
6719 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6720 	if (!nh_grp) {
6721 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6722 		if (IS_ERR(nh_grp))
6723 			return PTR_ERR(nh_grp);
6724 	}
6725 
6726 	/* The route and the nexthop are described by the same struct, so we
6727 	 * need to the update the nexthop offload indication for the new route.
6728 	 */
6729 	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6730 
6731 out:
6732 	list_add_tail(&fib6_entry->common.nexthop_group_node,
6733 		      &nh_grp->fib_list);
6734 	fib6_entry->common.nh_group = nh_grp;
6735 
6736 	return 0;
6737 }
6738 
6739 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6740 					struct mlxsw_sp_fib_entry *fib_entry)
6741 {
6742 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6743 
6744 	list_del(&fib_entry->nexthop_group_node);
6745 	if (!list_empty(&nh_grp->fib_list))
6746 		return;
6747 
6748 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6749 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6750 		return;
6751 	}
6752 
6753 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6754 }
6755 
6756 static int
6757 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6758 			       struct mlxsw_sp_fib6_entry *fib6_entry)
6759 {
6760 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6761 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6762 	int err;
6763 
6764 	mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6765 	fib6_entry->common.nh_group = NULL;
6766 	list_del(&fib6_entry->common.nexthop_group_node);
6767 
6768 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6769 	if (err)
6770 		goto err_nexthop6_group_get;
6771 
6772 	err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6773 					     fib_node->fib);
6774 	if (err)
6775 		goto err_nexthop_group_vr_link;
6776 
6777 	/* In case this entry is offloaded, then the adjacency index
6778 	 * currently associated with it in the device's table is that
6779 	 * of the old group. Start using the new one instead.
6780 	 */
6781 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
6782 	if (err)
6783 		goto err_fib_entry_update;
6784 
6785 	if (list_empty(&old_nh_grp->fib_list))
6786 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6787 
6788 	return 0;
6789 
6790 err_fib_entry_update:
6791 	mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6792 					 fib_node->fib);
6793 err_nexthop_group_vr_link:
6794 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6795 err_nexthop6_group_get:
6796 	list_add_tail(&fib6_entry->common.nexthop_group_node,
6797 		      &old_nh_grp->fib_list);
6798 	fib6_entry->common.nh_group = old_nh_grp;
6799 	mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6800 	return err;
6801 }
6802 
6803 static int
6804 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6805 				struct mlxsw_sp_fib6_entry *fib6_entry,
6806 				struct fib6_info **rt_arr, unsigned int nrt6)
6807 {
6808 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6809 	int err, i;
6810 
6811 	for (i = 0; i < nrt6; i++) {
6812 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6813 		if (IS_ERR(mlxsw_sp_rt6)) {
6814 			err = PTR_ERR(mlxsw_sp_rt6);
6815 			goto err_rt6_unwind;
6816 		}
6817 
6818 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6819 		fib6_entry->nrt6++;
6820 	}
6821 
6822 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
6823 	if (err)
6824 		goto err_rt6_unwind;
6825 
6826 	return 0;
6827 
6828 err_rt6_unwind:
6829 	for (; i > 0; i--) {
6830 		fib6_entry->nrt6--;
6831 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6832 					       struct mlxsw_sp_rt6, list);
6833 		list_del(&mlxsw_sp_rt6->list);
6834 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6835 	}
6836 	return err;
6837 }
6838 
6839 static void
6840 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
6841 				struct mlxsw_sp_fib6_entry *fib6_entry,
6842 				struct fib6_info **rt_arr, unsigned int nrt6)
6843 {
6844 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6845 	int i;
6846 
6847 	for (i = 0; i < nrt6; i++) {
6848 		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
6849 							   rt_arr[i]);
6850 		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
6851 			continue;
6852 
6853 		fib6_entry->nrt6--;
6854 		list_del(&mlxsw_sp_rt6->list);
6855 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6856 	}
6857 
6858 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
6859 }
6860 
6861 static int
6862 mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
6863 				   struct mlxsw_sp_fib_entry *fib_entry,
6864 				   const struct fib6_info *rt)
6865 {
6866 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
6867 	union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr };
6868 	u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id);
6869 	struct mlxsw_sp_router *router = mlxsw_sp->router;
6870 	int ifindex = nhgi->nexthops[0].ifindex;
6871 	struct mlxsw_sp_ipip_entry *ipip_entry;
6872 
6873 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6874 	ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
6875 						       MLXSW_SP_L3_PROTO_IPV6,
6876 						       dip);
6877 
6878 	if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
6879 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
6880 		return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry,
6881 						     ipip_entry);
6882 	}
6883 	if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6884 					 MLXSW_SP_L3_PROTO_IPV6, &dip)) {
6885 		u32 tunnel_index;
6886 
6887 		tunnel_index = router->nve_decap_config.tunnel_index;
6888 		fib_entry->decap.tunnel_index = tunnel_index;
6889 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6890 	}
6891 
6892 	return 0;
6893 }
6894 
6895 static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6896 					struct mlxsw_sp_fib_entry *fib_entry,
6897 					const struct fib6_info *rt)
6898 {
6899 	if (rt->fib6_flags & RTF_LOCAL)
6900 		return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry,
6901 							  rt);
6902 	if (rt->fib6_flags & RTF_ANYCAST)
6903 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6904 	else if (rt->fib6_type == RTN_BLACKHOLE)
6905 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6906 	else if (rt->fib6_flags & RTF_REJECT)
6907 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6908 	else if (fib_entry->nh_group->nhgi->gateway)
6909 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6910 	else
6911 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6912 
6913 	return 0;
6914 }
6915 
6916 static void
6917 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
6918 {
6919 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
6920 
6921 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
6922 				 list) {
6923 		fib6_entry->nrt6--;
6924 		list_del(&mlxsw_sp_rt6->list);
6925 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6926 	}
6927 }
6928 
6929 static struct mlxsw_sp_fib6_entry *
6930 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
6931 			   struct mlxsw_sp_fib_node *fib_node,
6932 			   struct fib6_info **rt_arr, unsigned int nrt6)
6933 {
6934 	struct mlxsw_sp_fib6_entry *fib6_entry;
6935 	struct mlxsw_sp_fib_entry *fib_entry;
6936 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6937 	int err, i;
6938 
6939 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
6940 	if (!fib6_entry)
6941 		return ERR_PTR(-ENOMEM);
6942 	fib_entry = &fib6_entry->common;
6943 
6944 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
6945 
6946 	for (i = 0; i < nrt6; i++) {
6947 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6948 		if (IS_ERR(mlxsw_sp_rt6)) {
6949 			err = PTR_ERR(mlxsw_sp_rt6);
6950 			goto err_rt6_unwind;
6951 		}
6952 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6953 		fib6_entry->nrt6++;
6954 	}
6955 
6956 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6957 	if (err)
6958 		goto err_rt6_unwind;
6959 
6960 	err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6961 					     fib_node->fib);
6962 	if (err)
6963 		goto err_nexthop_group_vr_link;
6964 
6965 	err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
6966 	if (err)
6967 		goto err_fib6_entry_type_set;
6968 
6969 	fib_entry->fib_node = fib_node;
6970 
6971 	return fib6_entry;
6972 
6973 err_fib6_entry_type_set:
6974 	mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6975 err_nexthop_group_vr_link:
6976 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
6977 err_rt6_unwind:
6978 	for (; i > 0; i--) {
6979 		fib6_entry->nrt6--;
6980 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6981 					       struct mlxsw_sp_rt6, list);
6982 		list_del(&mlxsw_sp_rt6->list);
6983 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6984 	}
6985 	kfree(fib6_entry);
6986 	return ERR_PTR(err);
6987 }
6988 
6989 static void
6990 mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6991 			       struct mlxsw_sp_fib6_entry *fib6_entry)
6992 {
6993 	mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common);
6994 }
6995 
6996 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6997 					struct mlxsw_sp_fib6_entry *fib6_entry)
6998 {
6999 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7000 
7001 	mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry);
7002 	mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
7003 					 fib_node->fib);
7004 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
7005 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
7006 	WARN_ON(fib6_entry->nrt6);
7007 	kfree(fib6_entry);
7008 }
7009 
7010 static struct mlxsw_sp_fib6_entry *
7011 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
7012 			   const struct fib6_info *rt)
7013 {
7014 	struct mlxsw_sp_fib6_entry *fib6_entry;
7015 	struct mlxsw_sp_fib_node *fib_node;
7016 	struct mlxsw_sp_fib *fib;
7017 	struct fib6_info *cmp_rt;
7018 	struct mlxsw_sp_vr *vr;
7019 
7020 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
7021 	if (!vr)
7022 		return NULL;
7023 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
7024 
7025 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
7026 					    sizeof(rt->fib6_dst.addr),
7027 					    rt->fib6_dst.plen);
7028 	if (!fib_node)
7029 		return NULL;
7030 
7031 	fib6_entry = container_of(fib_node->fib_entry,
7032 				  struct mlxsw_sp_fib6_entry, common);
7033 	cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7034 	if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
7035 	    rt->fib6_metric == cmp_rt->fib6_metric &&
7036 	    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
7037 		return fib6_entry;
7038 
7039 	return NULL;
7040 }
7041 
7042 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
7043 {
7044 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7045 	struct mlxsw_sp_fib6_entry *fib6_replaced;
7046 	struct fib6_info *rt, *rt_replaced;
7047 
7048 	if (!fib_node->fib_entry)
7049 		return true;
7050 
7051 	fib6_replaced = container_of(fib_node->fib_entry,
7052 				     struct mlxsw_sp_fib6_entry,
7053 				     common);
7054 	rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7055 	rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
7056 	if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
7057 	    rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
7058 		return false;
7059 
7060 	return true;
7061 }
7062 
7063 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
7064 					struct fib6_info **rt_arr,
7065 					unsigned int nrt6)
7066 {
7067 	struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
7068 	struct mlxsw_sp_fib_entry *replaced;
7069 	struct mlxsw_sp_fib_node *fib_node;
7070 	struct fib6_info *rt = rt_arr[0];
7071 	int err;
7072 
7073 	if (rt->fib6_src.plen)
7074 		return -EINVAL;
7075 
7076 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7077 		return 0;
7078 
7079 	if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
7080 		return 0;
7081 
7082 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7083 					 &rt->fib6_dst.addr,
7084 					 sizeof(rt->fib6_dst.addr),
7085 					 rt->fib6_dst.plen,
7086 					 MLXSW_SP_L3_PROTO_IPV6);
7087 	if (IS_ERR(fib_node))
7088 		return PTR_ERR(fib_node);
7089 
7090 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
7091 						nrt6);
7092 	if (IS_ERR(fib6_entry)) {
7093 		err = PTR_ERR(fib6_entry);
7094 		goto err_fib6_entry_create;
7095 	}
7096 
7097 	if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
7098 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7099 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7100 		return 0;
7101 	}
7102 
7103 	replaced = fib_node->fib_entry;
7104 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
7105 	if (err)
7106 		goto err_fib_node_entry_link;
7107 
7108 	/* Nothing to replace */
7109 	if (!replaced)
7110 		return 0;
7111 
7112 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
7113 	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
7114 				     common);
7115 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
7116 
7117 	return 0;
7118 
7119 err_fib_node_entry_link:
7120 	fib_node->fib_entry = replaced;
7121 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7122 err_fib6_entry_create:
7123 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7124 	return err;
7125 }
7126 
7127 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
7128 				       struct fib6_info **rt_arr,
7129 				       unsigned int nrt6)
7130 {
7131 	struct mlxsw_sp_fib6_entry *fib6_entry;
7132 	struct mlxsw_sp_fib_node *fib_node;
7133 	struct fib6_info *rt = rt_arr[0];
7134 	int err;
7135 
7136 	if (rt->fib6_src.plen)
7137 		return -EINVAL;
7138 
7139 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7140 		return 0;
7141 
7142 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7143 					 &rt->fib6_dst.addr,
7144 					 sizeof(rt->fib6_dst.addr),
7145 					 rt->fib6_dst.plen,
7146 					 MLXSW_SP_L3_PROTO_IPV6);
7147 	if (IS_ERR(fib_node))
7148 		return PTR_ERR(fib_node);
7149 
7150 	if (WARN_ON_ONCE(!fib_node->fib_entry)) {
7151 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7152 		return -EINVAL;
7153 	}
7154 
7155 	fib6_entry = container_of(fib_node->fib_entry,
7156 				  struct mlxsw_sp_fib6_entry, common);
7157 	err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
7158 					      nrt6);
7159 	if (err)
7160 		goto err_fib6_entry_nexthop_add;
7161 
7162 	return 0;
7163 
7164 err_fib6_entry_nexthop_add:
7165 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7166 	return err;
7167 }
7168 
7169 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
7170 				     struct fib6_info **rt_arr,
7171 				     unsigned int nrt6)
7172 {
7173 	struct mlxsw_sp_fib6_entry *fib6_entry;
7174 	struct mlxsw_sp_fib_node *fib_node;
7175 	struct fib6_info *rt = rt_arr[0];
7176 
7177 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7178 		return;
7179 
7180 	/* Multipath routes are first added to the FIB trie and only then
7181 	 * notified. If we vetoed the addition, we will get a delete
7182 	 * notification for a route we do not have. Therefore, do not warn if
7183 	 * route was not found.
7184 	 */
7185 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
7186 	if (!fib6_entry)
7187 		return;
7188 
7189 	/* If not all the nexthops are deleted, then only reduce the nexthop
7190 	 * group.
7191 	 */
7192 	if (nrt6 != fib6_entry->nrt6) {
7193 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
7194 						nrt6);
7195 		return;
7196 	}
7197 
7198 	fib_node = fib6_entry->common.fib_node;
7199 
7200 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
7201 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7202 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7203 }
7204 
7205 static struct mlxsw_sp_mr_table *
7206 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
7207 {
7208 	if (family == RTNL_FAMILY_IPMR)
7209 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
7210 	else
7211 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
7212 }
7213 
7214 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
7215 				     struct mfc_entry_notifier_info *men_info,
7216 				     bool replace)
7217 {
7218 	struct mlxsw_sp_mr_table *mrt;
7219 	struct mlxsw_sp_vr *vr;
7220 
7221 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
7222 	if (IS_ERR(vr))
7223 		return PTR_ERR(vr);
7224 
7225 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7226 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
7227 }
7228 
7229 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
7230 				      struct mfc_entry_notifier_info *men_info)
7231 {
7232 	struct mlxsw_sp_mr_table *mrt;
7233 	struct mlxsw_sp_vr *vr;
7234 
7235 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
7236 	if (WARN_ON(!vr))
7237 		return;
7238 
7239 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7240 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
7241 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7242 }
7243 
7244 static int
7245 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
7246 			      struct vif_entry_notifier_info *ven_info)
7247 {
7248 	struct mlxsw_sp_mr_table *mrt;
7249 	struct mlxsw_sp_rif *rif;
7250 	struct mlxsw_sp_vr *vr;
7251 
7252 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
7253 	if (IS_ERR(vr))
7254 		return PTR_ERR(vr);
7255 
7256 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7257 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
7258 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
7259 				   ven_info->vif_index,
7260 				   ven_info->vif_flags, rif);
7261 }
7262 
7263 static void
7264 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
7265 			      struct vif_entry_notifier_info *ven_info)
7266 {
7267 	struct mlxsw_sp_mr_table *mrt;
7268 	struct mlxsw_sp_vr *vr;
7269 
7270 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
7271 	if (WARN_ON(!vr))
7272 		return;
7273 
7274 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7275 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
7276 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7277 }
7278 
7279 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
7280 				     struct mlxsw_sp_fib_node *fib_node)
7281 {
7282 	struct mlxsw_sp_fib4_entry *fib4_entry;
7283 
7284 	fib4_entry = container_of(fib_node->fib_entry,
7285 				  struct mlxsw_sp_fib4_entry, common);
7286 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7287 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
7288 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7289 }
7290 
7291 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
7292 				     struct mlxsw_sp_fib_node *fib_node)
7293 {
7294 	struct mlxsw_sp_fib6_entry *fib6_entry;
7295 
7296 	fib6_entry = container_of(fib_node->fib_entry,
7297 				  struct mlxsw_sp_fib6_entry, common);
7298 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7299 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7300 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7301 }
7302 
7303 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
7304 				    struct mlxsw_sp_fib_node *fib_node)
7305 {
7306 	switch (fib_node->fib->proto) {
7307 	case MLXSW_SP_L3_PROTO_IPV4:
7308 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
7309 		break;
7310 	case MLXSW_SP_L3_PROTO_IPV6:
7311 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
7312 		break;
7313 	}
7314 }
7315 
7316 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
7317 				  struct mlxsw_sp_vr *vr,
7318 				  enum mlxsw_sp_l3proto proto)
7319 {
7320 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
7321 	struct mlxsw_sp_fib_node *fib_node, *tmp;
7322 
7323 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
7324 		bool do_break = &tmp->list == &fib->node_list;
7325 
7326 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
7327 		if (do_break)
7328 			break;
7329 	}
7330 }
7331 
7332 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
7333 {
7334 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
7335 	int i, j;
7336 
7337 	for (i = 0; i < max_vrs; i++) {
7338 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
7339 
7340 		if (!mlxsw_sp_vr_is_used(vr))
7341 			continue;
7342 
7343 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
7344 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
7345 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
7346 
7347 		/* If virtual router was only used for IPv4, then it's no
7348 		 * longer used.
7349 		 */
7350 		if (!mlxsw_sp_vr_is_used(vr))
7351 			continue;
7352 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
7353 	}
7354 }
7355 
7356 struct mlxsw_sp_fib6_event_work {
7357 	struct fib6_info **rt_arr;
7358 	unsigned int nrt6;
7359 };
7360 
7361 struct mlxsw_sp_fib_event_work {
7362 	struct work_struct work;
7363 	union {
7364 		struct mlxsw_sp_fib6_event_work fib6_work;
7365 		struct fib_entry_notifier_info fen_info;
7366 		struct fib_rule_notifier_info fr_info;
7367 		struct fib_nh_notifier_info fnh_info;
7368 		struct mfc_entry_notifier_info men_info;
7369 		struct vif_entry_notifier_info ven_info;
7370 	};
7371 	struct mlxsw_sp *mlxsw_sp;
7372 	unsigned long event;
7373 };
7374 
7375 static int
7376 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
7377 			       struct fib6_entry_notifier_info *fen6_info)
7378 {
7379 	struct fib6_info *rt = fen6_info->rt;
7380 	struct fib6_info **rt_arr;
7381 	struct fib6_info *iter;
7382 	unsigned int nrt6;
7383 	int i = 0;
7384 
7385 	nrt6 = fen6_info->nsiblings + 1;
7386 
7387 	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
7388 	if (!rt_arr)
7389 		return -ENOMEM;
7390 
7391 	fib6_work->rt_arr = rt_arr;
7392 	fib6_work->nrt6 = nrt6;
7393 
7394 	rt_arr[0] = rt;
7395 	fib6_info_hold(rt);
7396 
7397 	if (!fen6_info->nsiblings)
7398 		return 0;
7399 
7400 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
7401 		if (i == fen6_info->nsiblings)
7402 			break;
7403 
7404 		rt_arr[i + 1] = iter;
7405 		fib6_info_hold(iter);
7406 		i++;
7407 	}
7408 	WARN_ON_ONCE(i != fen6_info->nsiblings);
7409 
7410 	return 0;
7411 }
7412 
7413 static void
7414 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
7415 {
7416 	int i;
7417 
7418 	for (i = 0; i < fib6_work->nrt6; i++)
7419 		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
7420 	kfree(fib6_work->rt_arr);
7421 }
7422 
7423 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
7424 {
7425 	struct mlxsw_sp_fib_event_work *fib_work =
7426 		container_of(work, struct mlxsw_sp_fib_event_work, work);
7427 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7428 	int err;
7429 
7430 	mutex_lock(&mlxsw_sp->router->lock);
7431 	mlxsw_sp_span_respin(mlxsw_sp);
7432 
7433 	switch (fib_work->event) {
7434 	case FIB_EVENT_ENTRY_REPLACE:
7435 		err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
7436 						   &fib_work->fen_info);
7437 		if (err) {
7438 			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7439 			mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
7440 							      &fib_work->fen_info);
7441 		}
7442 		fib_info_put(fib_work->fen_info.fi);
7443 		break;
7444 	case FIB_EVENT_ENTRY_DEL:
7445 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
7446 		fib_info_put(fib_work->fen_info.fi);
7447 		break;
7448 	case FIB_EVENT_NH_ADD:
7449 	case FIB_EVENT_NH_DEL:
7450 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
7451 					fib_work->fnh_info.fib_nh);
7452 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
7453 		break;
7454 	}
7455 	mutex_unlock(&mlxsw_sp->router->lock);
7456 	kfree(fib_work);
7457 }
7458 
7459 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
7460 {
7461 	struct mlxsw_sp_fib_event_work *fib_work =
7462 		    container_of(work, struct mlxsw_sp_fib_event_work, work);
7463 	struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work;
7464 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7465 	int err;
7466 
7467 	mutex_lock(&mlxsw_sp->router->lock);
7468 	mlxsw_sp_span_respin(mlxsw_sp);
7469 
7470 	switch (fib_work->event) {
7471 	case FIB_EVENT_ENTRY_REPLACE:
7472 		err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
7473 						   fib6_work->rt_arr,
7474 						   fib6_work->nrt6);
7475 		if (err) {
7476 			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7477 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7478 							      fib6_work->rt_arr,
7479 							      fib6_work->nrt6);
7480 		}
7481 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7482 		break;
7483 	case FIB_EVENT_ENTRY_APPEND:
7484 		err = mlxsw_sp_router_fib6_append(mlxsw_sp,
7485 						  fib6_work->rt_arr,
7486 						  fib6_work->nrt6);
7487 		if (err) {
7488 			dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
7489 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7490 							      fib6_work->rt_arr,
7491 							      fib6_work->nrt6);
7492 		}
7493 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7494 		break;
7495 	case FIB_EVENT_ENTRY_DEL:
7496 		mlxsw_sp_router_fib6_del(mlxsw_sp,
7497 					 fib6_work->rt_arr,
7498 					 fib6_work->nrt6);
7499 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7500 		break;
7501 	}
7502 	mutex_unlock(&mlxsw_sp->router->lock);
7503 	kfree(fib_work);
7504 }
7505 
7506 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
7507 {
7508 	struct mlxsw_sp_fib_event_work *fib_work =
7509 		container_of(work, struct mlxsw_sp_fib_event_work, work);
7510 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7511 	bool replace;
7512 	int err;
7513 
7514 	rtnl_lock();
7515 	mutex_lock(&mlxsw_sp->router->lock);
7516 	switch (fib_work->event) {
7517 	case FIB_EVENT_ENTRY_REPLACE:
7518 	case FIB_EVENT_ENTRY_ADD:
7519 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
7520 
7521 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
7522 						replace);
7523 		if (err)
7524 			dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
7525 		mr_cache_put(fib_work->men_info.mfc);
7526 		break;
7527 	case FIB_EVENT_ENTRY_DEL:
7528 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
7529 		mr_cache_put(fib_work->men_info.mfc);
7530 		break;
7531 	case FIB_EVENT_VIF_ADD:
7532 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7533 						    &fib_work->ven_info);
7534 		if (err)
7535 			dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
7536 		dev_put(fib_work->ven_info.dev);
7537 		break;
7538 	case FIB_EVENT_VIF_DEL:
7539 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
7540 					      &fib_work->ven_info);
7541 		dev_put(fib_work->ven_info.dev);
7542 		break;
7543 	}
7544 	mutex_unlock(&mlxsw_sp->router->lock);
7545 	rtnl_unlock();
7546 	kfree(fib_work);
7547 }
7548 
7549 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
7550 				       struct fib_notifier_info *info)
7551 {
7552 	struct fib_entry_notifier_info *fen_info;
7553 	struct fib_nh_notifier_info *fnh_info;
7554 
7555 	switch (fib_work->event) {
7556 	case FIB_EVENT_ENTRY_REPLACE:
7557 	case FIB_EVENT_ENTRY_DEL:
7558 		fen_info = container_of(info, struct fib_entry_notifier_info,
7559 					info);
7560 		fib_work->fen_info = *fen_info;
7561 		/* Take reference on fib_info to prevent it from being
7562 		 * freed while work is queued. Release it afterwards.
7563 		 */
7564 		fib_info_hold(fib_work->fen_info.fi);
7565 		break;
7566 	case FIB_EVENT_NH_ADD:
7567 	case FIB_EVENT_NH_DEL:
7568 		fnh_info = container_of(info, struct fib_nh_notifier_info,
7569 					info);
7570 		fib_work->fnh_info = *fnh_info;
7571 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
7572 		break;
7573 	}
7574 }
7575 
7576 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
7577 				      struct fib_notifier_info *info)
7578 {
7579 	struct fib6_entry_notifier_info *fen6_info;
7580 	int err;
7581 
7582 	switch (fib_work->event) {
7583 	case FIB_EVENT_ENTRY_REPLACE:
7584 	case FIB_EVENT_ENTRY_APPEND:
7585 	case FIB_EVENT_ENTRY_DEL:
7586 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
7587 					 info);
7588 		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
7589 						     fen6_info);
7590 		if (err)
7591 			return err;
7592 		break;
7593 	}
7594 
7595 	return 0;
7596 }
7597 
7598 static void
7599 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
7600 			    struct fib_notifier_info *info)
7601 {
7602 	switch (fib_work->event) {
7603 	case FIB_EVENT_ENTRY_REPLACE:
7604 	case FIB_EVENT_ENTRY_ADD:
7605 	case FIB_EVENT_ENTRY_DEL:
7606 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
7607 		mr_cache_hold(fib_work->men_info.mfc);
7608 		break;
7609 	case FIB_EVENT_VIF_ADD:
7610 	case FIB_EVENT_VIF_DEL:
7611 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
7612 		dev_hold(fib_work->ven_info.dev);
7613 		break;
7614 	}
7615 }
7616 
7617 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7618 					  struct fib_notifier_info *info,
7619 					  struct mlxsw_sp *mlxsw_sp)
7620 {
7621 	struct netlink_ext_ack *extack = info->extack;
7622 	struct fib_rule_notifier_info *fr_info;
7623 	struct fib_rule *rule;
7624 	int err = 0;
7625 
7626 	/* nothing to do at the moment */
7627 	if (event == FIB_EVENT_RULE_DEL)
7628 		return 0;
7629 
7630 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
7631 	rule = fr_info->rule;
7632 
7633 	/* Rule only affects locally generated traffic */
7634 	if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7635 		return 0;
7636 
7637 	switch (info->family) {
7638 	case AF_INET:
7639 		if (!fib4_rule_default(rule) && !rule->l3mdev)
7640 			err = -EOPNOTSUPP;
7641 		break;
7642 	case AF_INET6:
7643 		if (!fib6_rule_default(rule) && !rule->l3mdev)
7644 			err = -EOPNOTSUPP;
7645 		break;
7646 	case RTNL_FAMILY_IPMR:
7647 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
7648 			err = -EOPNOTSUPP;
7649 		break;
7650 	case RTNL_FAMILY_IP6MR:
7651 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7652 			err = -EOPNOTSUPP;
7653 		break;
7654 	}
7655 
7656 	if (err < 0)
7657 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7658 
7659 	return err;
7660 }
7661 
7662 /* Called with rcu_read_lock() */
7663 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7664 				     unsigned long event, void *ptr)
7665 {
7666 	struct mlxsw_sp_fib_event_work *fib_work;
7667 	struct fib_notifier_info *info = ptr;
7668 	struct mlxsw_sp_router *router;
7669 	int err;
7670 
7671 	if ((info->family != AF_INET && info->family != AF_INET6 &&
7672 	     info->family != RTNL_FAMILY_IPMR &&
7673 	     info->family != RTNL_FAMILY_IP6MR))
7674 		return NOTIFY_DONE;
7675 
7676 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7677 
7678 	switch (event) {
7679 	case FIB_EVENT_RULE_ADD:
7680 	case FIB_EVENT_RULE_DEL:
7681 		err = mlxsw_sp_router_fib_rule_event(event, info,
7682 						     router->mlxsw_sp);
7683 		return notifier_from_errno(err);
7684 	case FIB_EVENT_ENTRY_ADD:
7685 	case FIB_EVENT_ENTRY_REPLACE:
7686 	case FIB_EVENT_ENTRY_APPEND:
7687 		if (info->family == AF_INET) {
7688 			struct fib_entry_notifier_info *fen_info = ptr;
7689 
7690 			if (fen_info->fi->fib_nh_is_v6) {
7691 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7692 				return notifier_from_errno(-EINVAL);
7693 			}
7694 		}
7695 		break;
7696 	}
7697 
7698 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
7699 	if (!fib_work)
7700 		return NOTIFY_BAD;
7701 
7702 	fib_work->mlxsw_sp = router->mlxsw_sp;
7703 	fib_work->event = event;
7704 
7705 	switch (info->family) {
7706 	case AF_INET:
7707 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
7708 		mlxsw_sp_router_fib4_event(fib_work, info);
7709 		break;
7710 	case AF_INET6:
7711 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
7712 		err = mlxsw_sp_router_fib6_event(fib_work, info);
7713 		if (err)
7714 			goto err_fib_event;
7715 		break;
7716 	case RTNL_FAMILY_IP6MR:
7717 	case RTNL_FAMILY_IPMR:
7718 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
7719 		mlxsw_sp_router_fibmr_event(fib_work, info);
7720 		break;
7721 	}
7722 
7723 	mlxsw_core_schedule_work(&fib_work->work);
7724 
7725 	return NOTIFY_DONE;
7726 
7727 err_fib_event:
7728 	kfree(fib_work);
7729 	return NOTIFY_BAD;
7730 }
7731 
7732 static struct mlxsw_sp_rif *
7733 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7734 			 const struct net_device *dev)
7735 {
7736 	int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7737 	int i;
7738 
7739 	for (i = 0; i < max_rifs; i++)
7740 		if (mlxsw_sp->router->rifs[i] &&
7741 		    mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev))
7742 			return mlxsw_sp->router->rifs[i];
7743 
7744 	return NULL;
7745 }
7746 
7747 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7748 			 const struct net_device *dev)
7749 {
7750 	struct mlxsw_sp_rif *rif;
7751 
7752 	mutex_lock(&mlxsw_sp->router->lock);
7753 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7754 	mutex_unlock(&mlxsw_sp->router->lock);
7755 
7756 	return rif;
7757 }
7758 
7759 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7760 {
7761 	struct mlxsw_sp_rif *rif;
7762 	u16 vid = 0;
7763 
7764 	mutex_lock(&mlxsw_sp->router->lock);
7765 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7766 	if (!rif)
7767 		goto out;
7768 
7769 	/* We only return the VID for VLAN RIFs. Otherwise we return an
7770 	 * invalid value (0).
7771 	 */
7772 	if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7773 		goto out;
7774 
7775 	vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7776 
7777 out:
7778 	mutex_unlock(&mlxsw_sp->router->lock);
7779 	return vid;
7780 }
7781 
7782 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7783 {
7784 	char ritr_pl[MLXSW_REG_RITR_LEN];
7785 	int err;
7786 
7787 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7788 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7789 	if (err)
7790 		return err;
7791 
7792 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
7793 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7794 }
7795 
7796 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
7797 					  struct mlxsw_sp_rif *rif)
7798 {
7799 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
7800 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
7801 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
7802 }
7803 
7804 static bool __mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
7805 {
7806 	struct inet6_dev *inet6_dev;
7807 	struct in_device *idev;
7808 
7809 	idev = __in_dev_get_rcu(dev);
7810 	if (idev && idev->ifa_list)
7811 		return false;
7812 
7813 	inet6_dev = __in6_dev_get(dev);
7814 	if (inet6_dev && !list_empty(&inet6_dev->addr_list))
7815 		return false;
7816 
7817 	return true;
7818 }
7819 
7820 static bool mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
7821 {
7822 	bool addr_list_empty;
7823 
7824 	rcu_read_lock();
7825 	addr_list_empty = __mlxsw_sp_dev_addr_list_empty(dev);
7826 	rcu_read_unlock();
7827 
7828 	return addr_list_empty;
7829 }
7830 
7831 static bool
7832 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
7833 			   unsigned long event)
7834 {
7835 	bool addr_list_empty;
7836 
7837 	switch (event) {
7838 	case NETDEV_UP:
7839 		return rif == NULL;
7840 	case NETDEV_DOWN:
7841 		addr_list_empty = mlxsw_sp_dev_addr_list_empty(dev);
7842 
7843 		/* macvlans do not have a RIF, but rather piggy back on the
7844 		 * RIF of their lower device.
7845 		 */
7846 		if (netif_is_macvlan(dev) && addr_list_empty)
7847 			return true;
7848 
7849 		if (rif && addr_list_empty &&
7850 		    !netif_is_l3_slave(mlxsw_sp_rif_dev(rif)))
7851 			return true;
7852 		/* It is possible we already removed the RIF ourselves
7853 		 * if it was assigned to a netdev that is now a bridge
7854 		 * or LAG slave.
7855 		 */
7856 		return false;
7857 	}
7858 
7859 	return false;
7860 }
7861 
7862 static enum mlxsw_sp_rif_type
7863 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
7864 		      const struct net_device *dev)
7865 {
7866 	enum mlxsw_sp_fid_type type;
7867 
7868 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
7869 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
7870 
7871 	/* Otherwise RIF type is derived from the type of the underlying FID. */
7872 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
7873 		type = MLXSW_SP_FID_TYPE_8021Q;
7874 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
7875 		type = MLXSW_SP_FID_TYPE_8021Q;
7876 	else if (netif_is_bridge_master(dev))
7877 		type = MLXSW_SP_FID_TYPE_8021D;
7878 	else
7879 		type = MLXSW_SP_FID_TYPE_RFID;
7880 
7881 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
7882 }
7883 
7884 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index,
7885 				    u8 rif_entries)
7886 {
7887 	*p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table,
7888 				      rif_entries);
7889 	if (*p_rif_index == 0)
7890 		return -ENOBUFS;
7891 	*p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET;
7892 
7893 	/* RIF indexes must be aligned to the allocation size. */
7894 	WARN_ON_ONCE(*p_rif_index % rif_entries);
7895 
7896 	return 0;
7897 }
7898 
7899 static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7900 				    u8 rif_entries)
7901 {
7902 	gen_pool_free(mlxsw_sp->router->rifs_table,
7903 		      MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries);
7904 }
7905 
7906 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
7907 					       u16 vr_id,
7908 					       struct net_device *l3_dev)
7909 {
7910 	struct mlxsw_sp_rif *rif;
7911 
7912 	rif = kzalloc(rif_size, GFP_KERNEL);
7913 	if (!rif)
7914 		return NULL;
7915 
7916 	INIT_LIST_HEAD(&rif->nexthop_list);
7917 	INIT_LIST_HEAD(&rif->neigh_list);
7918 	if (l3_dev) {
7919 		ether_addr_copy(rif->addr, l3_dev->dev_addr);
7920 		rif->mtu = l3_dev->mtu;
7921 		rif->dev = l3_dev;
7922 	}
7923 	rif->vr_id = vr_id;
7924 	rif->rif_index = rif_index;
7925 
7926 	return rif;
7927 }
7928 
7929 static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif)
7930 {
7931 	WARN_ON(!list_empty(&rif->neigh_list));
7932 	WARN_ON(!list_empty(&rif->nexthop_list));
7933 	kfree(rif);
7934 }
7935 
7936 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
7937 					   u16 rif_index)
7938 {
7939 	return mlxsw_sp->router->rifs[rif_index];
7940 }
7941 
7942 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
7943 {
7944 	return rif->rif_index;
7945 }
7946 
7947 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7948 {
7949 	return lb_rif->common.rif_index;
7950 }
7951 
7952 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7953 {
7954 	struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
7955 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
7956 	struct mlxsw_sp_vr *ul_vr;
7957 
7958 	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
7959 	if (WARN_ON(IS_ERR(ul_vr)))
7960 		return 0;
7961 
7962 	return ul_vr->id;
7963 }
7964 
7965 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7966 {
7967 	return lb_rif->ul_rif_id;
7968 }
7969 
7970 static bool
7971 mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif)
7972 {
7973 	return mlxsw_sp_rif_counter_valid_get(rif,
7974 					      MLXSW_SP_RIF_COUNTER_EGRESS) &&
7975 	       mlxsw_sp_rif_counter_valid_get(rif,
7976 					      MLXSW_SP_RIF_COUNTER_INGRESS);
7977 }
7978 
7979 static int
7980 mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif)
7981 {
7982 	int err;
7983 
7984 	err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
7985 	if (err)
7986 		return err;
7987 
7988 	/* Clear stale data. */
7989 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
7990 					       MLXSW_SP_RIF_COUNTER_INGRESS,
7991 					       NULL);
7992 	if (err)
7993 		goto err_clear_ingress;
7994 
7995 	err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
7996 	if (err)
7997 		goto err_alloc_egress;
7998 
7999 	/* Clear stale data. */
8000 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8001 					       MLXSW_SP_RIF_COUNTER_EGRESS,
8002 					       NULL);
8003 	if (err)
8004 		goto err_clear_egress;
8005 
8006 	return 0;
8007 
8008 err_clear_egress:
8009 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8010 err_alloc_egress:
8011 err_clear_ingress:
8012 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8013 	return err;
8014 }
8015 
8016 static void
8017 mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif)
8018 {
8019 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8020 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8021 }
8022 
8023 static void
8024 mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif,
8025 					  struct netdev_notifier_offload_xstats_info *info)
8026 {
8027 	if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8028 		return;
8029 	netdev_offload_xstats_report_used(info->report_used);
8030 }
8031 
8032 static int
8033 mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif,
8034 				    struct rtnl_hw_stats64 *p_stats)
8035 {
8036 	struct mlxsw_sp_rif_counter_set_basic ingress;
8037 	struct mlxsw_sp_rif_counter_set_basic egress;
8038 	int err;
8039 
8040 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8041 					       MLXSW_SP_RIF_COUNTER_INGRESS,
8042 					       &ingress);
8043 	if (err)
8044 		return err;
8045 
8046 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8047 					       MLXSW_SP_RIF_COUNTER_EGRESS,
8048 					       &egress);
8049 	if (err)
8050 		return err;
8051 
8052 #define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX)		\
8053 		((SET.good_unicast_ ## SFX) +		\
8054 		 (SET.good_multicast_ ## SFX) +		\
8055 		 (SET.good_broadcast_ ## SFX))
8056 
8057 	p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets);
8058 	p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets);
8059 	p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes);
8060 	p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes);
8061 	p_stats->rx_errors = ingress.error_packets;
8062 	p_stats->tx_errors = egress.error_packets;
8063 	p_stats->rx_dropped = ingress.discard_packets;
8064 	p_stats->tx_dropped = egress.discard_packets;
8065 	p_stats->multicast = ingress.good_multicast_packets +
8066 			     ingress.good_broadcast_packets;
8067 
8068 #undef MLXSW_SP_ROUTER_ALL_GOOD
8069 
8070 	return 0;
8071 }
8072 
8073 static int
8074 mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif,
8075 					   struct netdev_notifier_offload_xstats_info *info)
8076 {
8077 	struct rtnl_hw_stats64 stats = {};
8078 	int err;
8079 
8080 	if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8081 		return 0;
8082 
8083 	err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats);
8084 	if (err)
8085 		return err;
8086 
8087 	netdev_offload_xstats_report_delta(info->report_delta, &stats);
8088 	return 0;
8089 }
8090 
8091 struct mlxsw_sp_router_hwstats_notify_work {
8092 	struct work_struct work;
8093 	struct net_device *dev;
8094 };
8095 
8096 static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work)
8097 {
8098 	struct mlxsw_sp_router_hwstats_notify_work *hws_work =
8099 		container_of(work, struct mlxsw_sp_router_hwstats_notify_work,
8100 			     work);
8101 
8102 	rtnl_lock();
8103 	rtnl_offload_xstats_notify(hws_work->dev);
8104 	rtnl_unlock();
8105 	dev_put(hws_work->dev);
8106 	kfree(hws_work);
8107 }
8108 
8109 static void
8110 mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev)
8111 {
8112 	struct mlxsw_sp_router_hwstats_notify_work *hws_work;
8113 
8114 	/* To collect notification payload, the core ends up sending another
8115 	 * notifier block message, which would deadlock on the attempt to
8116 	 * acquire the router lock again. Just postpone the notification until
8117 	 * later.
8118 	 */
8119 
8120 	hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL);
8121 	if (!hws_work)
8122 		return;
8123 
8124 	INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work);
8125 	dev_hold(dev);
8126 	hws_work->dev = dev;
8127 	mlxsw_core_schedule_work(&hws_work->work);
8128 }
8129 
8130 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
8131 {
8132 	return mlxsw_sp_rif_dev(rif)->ifindex;
8133 }
8134 
8135 bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif)
8136 {
8137 	return !!mlxsw_sp_rif_dev(rif);
8138 }
8139 
8140 bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif,
8141 			 const struct net_device *dev)
8142 {
8143 	return mlxsw_sp_rif_dev(rif) == dev;
8144 }
8145 
8146 static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif)
8147 {
8148 	struct rtnl_hw_stats64 stats = {};
8149 
8150 	if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats))
8151 		netdev_offload_xstats_push_delta(mlxsw_sp_rif_dev(rif),
8152 						 NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8153 						 &stats);
8154 }
8155 
8156 static struct mlxsw_sp_rif *
8157 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
8158 		    const struct mlxsw_sp_rif_params *params,
8159 		    struct netlink_ext_ack *extack)
8160 {
8161 	u8 rif_entries = params->double_entry ? 2 : 1;
8162 	u32 tb_id = l3mdev_fib_table(params->dev);
8163 	const struct mlxsw_sp_rif_ops *ops;
8164 	struct mlxsw_sp_fid *fid = NULL;
8165 	enum mlxsw_sp_rif_type type;
8166 	struct mlxsw_sp_rif *rif;
8167 	struct mlxsw_sp_vr *vr;
8168 	u16 rif_index;
8169 	int i, err;
8170 
8171 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
8172 	ops = mlxsw_sp->router->rif_ops_arr[type];
8173 
8174 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
8175 	if (IS_ERR(vr))
8176 		return ERR_CAST(vr);
8177 	vr->rif_count++;
8178 
8179 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
8180 	if (err) {
8181 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8182 		goto err_rif_index_alloc;
8183 	}
8184 
8185 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
8186 	if (!rif) {
8187 		err = -ENOMEM;
8188 		goto err_rif_alloc;
8189 	}
8190 	dev_hold(params->dev);
8191 	mlxsw_sp->router->rifs[rif_index] = rif;
8192 	rif->mlxsw_sp = mlxsw_sp;
8193 	rif->ops = ops;
8194 	rif->rif_entries = rif_entries;
8195 
8196 	if (ops->fid_get) {
8197 		fid = ops->fid_get(rif, extack);
8198 		if (IS_ERR(fid)) {
8199 			err = PTR_ERR(fid);
8200 			goto err_fid_get;
8201 		}
8202 		rif->fid = fid;
8203 	}
8204 
8205 	if (ops->setup)
8206 		ops->setup(rif, params);
8207 
8208 	err = ops->configure(rif, extack);
8209 	if (err)
8210 		goto err_configure;
8211 
8212 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
8213 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
8214 		if (err)
8215 			goto err_mr_rif_add;
8216 	}
8217 
8218 	if (netdev_offload_xstats_enabled(params->dev,
8219 					  NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8220 		err = mlxsw_sp_router_port_l3_stats_enable(rif);
8221 		if (err)
8222 			goto err_stats_enable;
8223 		mlxsw_sp_router_hwstats_notify_schedule(params->dev);
8224 	} else {
8225 		mlxsw_sp_rif_counters_alloc(rif);
8226 	}
8227 
8228 	atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
8229 	return rif;
8230 
8231 err_stats_enable:
8232 err_mr_rif_add:
8233 	for (i--; i >= 0; i--)
8234 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8235 	ops->deconfigure(rif);
8236 err_configure:
8237 	if (fid)
8238 		mlxsw_sp_fid_put(fid);
8239 err_fid_get:
8240 	mlxsw_sp->router->rifs[rif_index] = NULL;
8241 	dev_put(params->dev);
8242 	mlxsw_sp_rif_free(rif);
8243 err_rif_alloc:
8244 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8245 err_rif_index_alloc:
8246 	vr->rif_count--;
8247 	mlxsw_sp_vr_put(mlxsw_sp, vr);
8248 	return ERR_PTR(err);
8249 }
8250 
8251 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
8252 {
8253 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
8254 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
8255 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8256 	struct mlxsw_sp_fid *fid = rif->fid;
8257 	u8 rif_entries = rif->rif_entries;
8258 	u16 rif_index = rif->rif_index;
8259 	struct mlxsw_sp_vr *vr;
8260 	int i;
8261 
8262 	atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
8263 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8264 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
8265 
8266 	if (netdev_offload_xstats_enabled(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8267 		mlxsw_sp_rif_push_l3_stats(rif);
8268 		mlxsw_sp_router_port_l3_stats_disable(rif);
8269 		mlxsw_sp_router_hwstats_notify_schedule(dev);
8270 	} else {
8271 		mlxsw_sp_rif_counters_free(rif);
8272 	}
8273 
8274 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8275 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8276 	ops->deconfigure(rif);
8277 	if (fid)
8278 		/* Loopback RIFs are not associated with a FID. */
8279 		mlxsw_sp_fid_put(fid);
8280 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
8281 	dev_put(dev);
8282 	mlxsw_sp_rif_free(rif);
8283 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8284 	vr->rif_count--;
8285 	mlxsw_sp_vr_put(mlxsw_sp, vr);
8286 }
8287 
8288 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
8289 				 struct net_device *dev)
8290 {
8291 	struct mlxsw_sp_rif *rif;
8292 
8293 	mutex_lock(&mlxsw_sp->router->lock);
8294 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8295 	if (!rif)
8296 		goto out;
8297 	mlxsw_sp_rif_destroy(rif);
8298 out:
8299 	mutex_unlock(&mlxsw_sp->router->lock);
8300 }
8301 
8302 static void
8303 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
8304 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8305 {
8306 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8307 
8308 	params->vid = mlxsw_sp_port_vlan->vid;
8309 	params->lag = mlxsw_sp_port->lagged;
8310 	if (params->lag)
8311 		params->lag_id = mlxsw_sp_port->lag_id;
8312 	else
8313 		params->system_port = mlxsw_sp_port->local_port;
8314 }
8315 
8316 static struct mlxsw_sp_rif_subport *
8317 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
8318 {
8319 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
8320 }
8321 
8322 static struct mlxsw_sp_rif *
8323 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
8324 			 const struct mlxsw_sp_rif_params *params,
8325 			 struct netlink_ext_ack *extack)
8326 {
8327 	struct mlxsw_sp_rif_subport *rif_subport;
8328 	struct mlxsw_sp_rif *rif;
8329 
8330 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
8331 	if (!rif)
8332 		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
8333 
8334 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8335 	refcount_inc(&rif_subport->ref_count);
8336 	return rif;
8337 }
8338 
8339 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
8340 {
8341 	struct mlxsw_sp_rif_subport *rif_subport;
8342 
8343 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8344 	if (!refcount_dec_and_test(&rif_subport->ref_count))
8345 		return;
8346 
8347 	mlxsw_sp_rif_destroy(rif);
8348 }
8349 
8350 static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp,
8351 						struct mlxsw_sp_rif_mac_profile *profile,
8352 						struct netlink_ext_ack *extack)
8353 {
8354 	u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
8355 	struct mlxsw_sp_router *router = mlxsw_sp->router;
8356 	int id;
8357 
8358 	id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0,
8359 		       max_rif_mac_profiles, GFP_KERNEL);
8360 
8361 	if (id >= 0) {
8362 		profile->id = id;
8363 		return 0;
8364 	}
8365 
8366 	if (id == -ENOSPC)
8367 		NL_SET_ERR_MSG_MOD(extack,
8368 				   "Exceeded number of supported router interface MAC profiles");
8369 
8370 	return id;
8371 }
8372 
8373 static struct mlxsw_sp_rif_mac_profile *
8374 mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile)
8375 {
8376 	struct mlxsw_sp_rif_mac_profile *profile;
8377 
8378 	profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr,
8379 			     mac_profile);
8380 	WARN_ON(!profile);
8381 	return profile;
8382 }
8383 
8384 static struct mlxsw_sp_rif_mac_profile *
8385 mlxsw_sp_rif_mac_profile_alloc(const char *mac)
8386 {
8387 	struct mlxsw_sp_rif_mac_profile *profile;
8388 
8389 	profile = kzalloc(sizeof(*profile), GFP_KERNEL);
8390 	if (!profile)
8391 		return NULL;
8392 
8393 	ether_addr_copy(profile->mac_prefix, mac);
8394 	refcount_set(&profile->ref_count, 1);
8395 	return profile;
8396 }
8397 
8398 static struct mlxsw_sp_rif_mac_profile *
8399 mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac)
8400 {
8401 	struct mlxsw_sp_router *router = mlxsw_sp->router;
8402 	struct mlxsw_sp_rif_mac_profile *profile;
8403 	int id;
8404 
8405 	idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) {
8406 		if (ether_addr_equal_masked(profile->mac_prefix, mac,
8407 					    mlxsw_sp->mac_mask))
8408 			return profile;
8409 	}
8410 
8411 	return NULL;
8412 }
8413 
8414 static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
8415 {
8416 	const struct mlxsw_sp *mlxsw_sp = priv;
8417 
8418 	return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
8419 }
8420 
8421 static u64 mlxsw_sp_rifs_occ_get(void *priv)
8422 {
8423 	const struct mlxsw_sp *mlxsw_sp = priv;
8424 
8425 	return atomic_read(&mlxsw_sp->router->rifs_count);
8426 }
8427 
8428 static struct mlxsw_sp_rif_mac_profile *
8429 mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
8430 				struct netlink_ext_ack *extack)
8431 {
8432 	struct mlxsw_sp_rif_mac_profile *profile;
8433 	int err;
8434 
8435 	profile = mlxsw_sp_rif_mac_profile_alloc(mac);
8436 	if (!profile)
8437 		return ERR_PTR(-ENOMEM);
8438 
8439 	err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack);
8440 	if (err)
8441 		goto profile_index_alloc_err;
8442 
8443 	atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count);
8444 	return profile;
8445 
8446 profile_index_alloc_err:
8447 	kfree(profile);
8448 	return ERR_PTR(err);
8449 }
8450 
8451 static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp,
8452 					     u8 mac_profile)
8453 {
8454 	struct mlxsw_sp_rif_mac_profile *profile;
8455 
8456 	atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count);
8457 	profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile);
8458 	kfree(profile);
8459 }
8460 
8461 static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp,
8462 					const char *mac, u8 *p_mac_profile,
8463 					struct netlink_ext_ack *extack)
8464 {
8465 	struct mlxsw_sp_rif_mac_profile *profile;
8466 
8467 	profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac);
8468 	if (profile) {
8469 		refcount_inc(&profile->ref_count);
8470 		goto out;
8471 	}
8472 
8473 	profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack);
8474 	if (IS_ERR(profile))
8475 		return PTR_ERR(profile);
8476 
8477 out:
8478 	*p_mac_profile = profile->id;
8479 	return 0;
8480 }
8481 
8482 static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp,
8483 					 u8 mac_profile)
8484 {
8485 	struct mlxsw_sp_rif_mac_profile *profile;
8486 
8487 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8488 			   mac_profile);
8489 	if (WARN_ON(!profile))
8490 		return;
8491 
8492 	if (!refcount_dec_and_test(&profile->ref_count))
8493 		return;
8494 
8495 	mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile);
8496 }
8497 
8498 static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif)
8499 {
8500 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8501 	struct mlxsw_sp_rif_mac_profile *profile;
8502 
8503 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8504 			   rif->mac_profile_id);
8505 	if (WARN_ON(!profile))
8506 		return false;
8507 
8508 	return refcount_read(&profile->ref_count) > 1;
8509 }
8510 
8511 static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif,
8512 					 const char *new_mac)
8513 {
8514 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8515 	struct mlxsw_sp_rif_mac_profile *profile;
8516 
8517 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8518 			   rif->mac_profile_id);
8519 	if (WARN_ON(!profile))
8520 		return -EINVAL;
8521 
8522 	ether_addr_copy(profile->mac_prefix, new_mac);
8523 	return 0;
8524 }
8525 
8526 static int
8527 mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp,
8528 				 struct mlxsw_sp_rif *rif,
8529 				 const char *new_mac,
8530 				 struct netlink_ext_ack *extack)
8531 {
8532 	u8 mac_profile;
8533 	int err;
8534 
8535 	if (!mlxsw_sp_rif_mac_profile_is_shared(rif) &&
8536 	    !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac))
8537 		return mlxsw_sp_rif_mac_profile_edit(rif, new_mac);
8538 
8539 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac,
8540 					   &mac_profile, extack);
8541 	if (err)
8542 		return err;
8543 
8544 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id);
8545 	rif->mac_profile_id = mac_profile;
8546 	return 0;
8547 }
8548 
8549 static int
8550 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8551 				 struct net_device *l3_dev,
8552 				 struct netlink_ext_ack *extack)
8553 {
8554 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8555 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
8556 	struct mlxsw_sp_rif_params params = {
8557 		.dev = l3_dev,
8558 	};
8559 	u16 vid = mlxsw_sp_port_vlan->vid;
8560 	struct mlxsw_sp_rif *rif;
8561 	struct mlxsw_sp_fid *fid;
8562 	int err;
8563 
8564 	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
8565 	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
8566 	if (IS_ERR(rif))
8567 		return PTR_ERR(rif);
8568 
8569 	/* FID was already created, just take a reference */
8570 	fid = rif->ops->fid_get(rif, extack);
8571 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
8572 	if (err)
8573 		goto err_fid_port_vid_map;
8574 
8575 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
8576 	if (err)
8577 		goto err_port_vid_learning_set;
8578 
8579 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
8580 					BR_STATE_FORWARDING);
8581 	if (err)
8582 		goto err_port_vid_stp_set;
8583 
8584 	mlxsw_sp_port_vlan->fid = fid;
8585 
8586 	return 0;
8587 
8588 err_port_vid_stp_set:
8589 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8590 err_port_vid_learning_set:
8591 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8592 err_fid_port_vid_map:
8593 	mlxsw_sp_fid_put(fid);
8594 	mlxsw_sp_rif_subport_put(rif);
8595 	return err;
8596 }
8597 
8598 static void
8599 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8600 {
8601 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8602 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
8603 	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
8604 	u16 vid = mlxsw_sp_port_vlan->vid;
8605 
8606 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
8607 		return;
8608 
8609 	mlxsw_sp_port_vlan->fid = NULL;
8610 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
8611 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8612 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8613 	mlxsw_sp_fid_put(fid);
8614 	mlxsw_sp_rif_subport_put(rif);
8615 }
8616 
8617 static int
8618 mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8619 					struct net_device *l3_dev,
8620 					struct netlink_ext_ack *extack)
8621 {
8622 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8623 
8624 	lockdep_assert_held(&mlxsw_sp->router->lock);
8625 
8626 	if (!mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev))
8627 		return 0;
8628 
8629 	return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
8630 						extack);
8631 }
8632 
8633 void
8634 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8635 {
8636 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8637 
8638 	mutex_lock(&mlxsw_sp->router->lock);
8639 	__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8640 	mutex_unlock(&mlxsw_sp->router->lock);
8641 }
8642 
8643 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
8644 					     struct net_device *port_dev,
8645 					     unsigned long event, u16 vid,
8646 					     struct netlink_ext_ack *extack)
8647 {
8648 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
8649 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
8650 
8651 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
8652 	if (WARN_ON(!mlxsw_sp_port_vlan))
8653 		return -EINVAL;
8654 
8655 	switch (event) {
8656 	case NETDEV_UP:
8657 		return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
8658 							l3_dev, extack);
8659 	case NETDEV_DOWN:
8660 		__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8661 		break;
8662 	}
8663 
8664 	return 0;
8665 }
8666 
8667 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
8668 					unsigned long event,
8669 					struct netlink_ext_ack *extack)
8670 {
8671 	if (netif_is_any_bridge_port(port_dev) || netif_is_lag_port(port_dev))
8672 		return 0;
8673 
8674 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
8675 						 MLXSW_SP_DEFAULT_VID, extack);
8676 }
8677 
8678 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
8679 					 struct net_device *lag_dev,
8680 					 unsigned long event, u16 vid,
8681 					 struct netlink_ext_ack *extack)
8682 {
8683 	struct net_device *port_dev;
8684 	struct list_head *iter;
8685 	int err;
8686 
8687 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
8688 		if (mlxsw_sp_port_dev_check(port_dev)) {
8689 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
8690 								port_dev,
8691 								event, vid,
8692 								extack);
8693 			if (err)
8694 				return err;
8695 		}
8696 	}
8697 
8698 	return 0;
8699 }
8700 
8701 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
8702 				       unsigned long event,
8703 				       struct netlink_ext_ack *extack)
8704 {
8705 	if (netif_is_bridge_port(lag_dev))
8706 		return 0;
8707 
8708 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
8709 					     MLXSW_SP_DEFAULT_VID, extack);
8710 }
8711 
8712 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
8713 					  struct net_device *l3_dev,
8714 					  unsigned long event,
8715 					  struct netlink_ext_ack *extack)
8716 {
8717 	struct mlxsw_sp_rif_params params = {
8718 		.dev = l3_dev,
8719 	};
8720 	struct mlxsw_sp_rif *rif;
8721 
8722 	switch (event) {
8723 	case NETDEV_UP:
8724 		if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
8725 			u16 proto;
8726 
8727 			br_vlan_get_proto(l3_dev, &proto);
8728 			if (proto == ETH_P_8021AD) {
8729 				NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
8730 				return -EOPNOTSUPP;
8731 			}
8732 		}
8733 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
8734 		if (IS_ERR(rif))
8735 			return PTR_ERR(rif);
8736 		break;
8737 	case NETDEV_DOWN:
8738 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8739 		mlxsw_sp_rif_destroy(rif);
8740 		break;
8741 	}
8742 
8743 	return 0;
8744 }
8745 
8746 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
8747 					struct net_device *vlan_dev,
8748 					unsigned long event,
8749 					struct netlink_ext_ack *extack)
8750 {
8751 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
8752 	u16 vid = vlan_dev_vlan_id(vlan_dev);
8753 
8754 	if (netif_is_bridge_port(vlan_dev))
8755 		return 0;
8756 
8757 	if (mlxsw_sp_port_dev_check(real_dev))
8758 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
8759 							 event, vid, extack);
8760 	else if (netif_is_lag_master(real_dev))
8761 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
8762 						     vid, extack);
8763 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
8764 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
8765 						      extack);
8766 
8767 	return 0;
8768 }
8769 
8770 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
8771 {
8772 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
8773 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8774 
8775 	return ether_addr_equal_masked(mac, vrrp4, mask);
8776 }
8777 
8778 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
8779 {
8780 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
8781 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8782 
8783 	return ether_addr_equal_masked(mac, vrrp6, mask);
8784 }
8785 
8786 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8787 				const u8 *mac, bool adding)
8788 {
8789 	char ritr_pl[MLXSW_REG_RITR_LEN];
8790 	u8 vrrp_id = adding ? mac[5] : 0;
8791 	int err;
8792 
8793 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
8794 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
8795 		return 0;
8796 
8797 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8798 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8799 	if (err)
8800 		return err;
8801 
8802 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
8803 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
8804 	else
8805 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
8806 
8807 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8808 }
8809 
8810 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
8811 				    const struct net_device *macvlan_dev,
8812 				    struct netlink_ext_ack *extack)
8813 {
8814 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8815 	struct mlxsw_sp_rif *rif;
8816 	int err;
8817 
8818 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8819 	if (!rif) {
8820 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
8821 		return -EOPNOTSUPP;
8822 	}
8823 
8824 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8825 				  mlxsw_sp_fid_index(rif->fid), true);
8826 	if (err)
8827 		return err;
8828 
8829 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
8830 				   macvlan_dev->dev_addr, true);
8831 	if (err)
8832 		goto err_rif_vrrp_add;
8833 
8834 	/* Make sure the bridge driver does not have this MAC pointing at
8835 	 * some other port.
8836 	 */
8837 	if (rif->ops->fdb_del)
8838 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
8839 
8840 	return 0;
8841 
8842 err_rif_vrrp_add:
8843 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8844 			    mlxsw_sp_fid_index(rif->fid), false);
8845 	return err;
8846 }
8847 
8848 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8849 				       const struct net_device *macvlan_dev)
8850 {
8851 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8852 	struct mlxsw_sp_rif *rif;
8853 
8854 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8855 	/* If we do not have a RIF, then we already took care of
8856 	 * removing the macvlan's MAC during RIF deletion.
8857 	 */
8858 	if (!rif)
8859 		return;
8860 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
8861 			     false);
8862 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8863 			    mlxsw_sp_fid_index(rif->fid), false);
8864 }
8865 
8866 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8867 			      const struct net_device *macvlan_dev)
8868 {
8869 	mutex_lock(&mlxsw_sp->router->lock);
8870 	__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8871 	mutex_unlock(&mlxsw_sp->router->lock);
8872 }
8873 
8874 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
8875 					   struct net_device *macvlan_dev,
8876 					   unsigned long event,
8877 					   struct netlink_ext_ack *extack)
8878 {
8879 	switch (event) {
8880 	case NETDEV_UP:
8881 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
8882 	case NETDEV_DOWN:
8883 		__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8884 		break;
8885 	}
8886 
8887 	return 0;
8888 }
8889 
8890 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
8891 				     struct net_device *dev,
8892 				     unsigned long event,
8893 				     struct netlink_ext_ack *extack)
8894 {
8895 	if (mlxsw_sp_port_dev_check(dev))
8896 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
8897 	else if (netif_is_lag_master(dev))
8898 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
8899 	else if (netif_is_bridge_master(dev))
8900 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
8901 						      extack);
8902 	else if (is_vlan_dev(dev))
8903 		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
8904 						    extack);
8905 	else if (netif_is_macvlan(dev))
8906 		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
8907 						       extack);
8908 	else
8909 		return 0;
8910 }
8911 
8912 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
8913 				   unsigned long event, void *ptr)
8914 {
8915 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
8916 	struct net_device *dev = ifa->ifa_dev->dev;
8917 	struct mlxsw_sp_router *router;
8918 	struct mlxsw_sp_rif *rif;
8919 	int err = 0;
8920 
8921 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
8922 	if (event == NETDEV_UP)
8923 		return NOTIFY_DONE;
8924 
8925 	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
8926 	mutex_lock(&router->lock);
8927 	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
8928 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
8929 		goto out;
8930 
8931 	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
8932 out:
8933 	mutex_unlock(&router->lock);
8934 	return notifier_from_errno(err);
8935 }
8936 
8937 static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
8938 					 unsigned long event, void *ptr)
8939 {
8940 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
8941 	struct net_device *dev = ivi->ivi_dev->dev;
8942 	struct mlxsw_sp *mlxsw_sp;
8943 	struct mlxsw_sp_rif *rif;
8944 	int err = 0;
8945 
8946 	mlxsw_sp = mlxsw_sp_lower_get(dev);
8947 	if (!mlxsw_sp)
8948 		return NOTIFY_DONE;
8949 
8950 	mutex_lock(&mlxsw_sp->router->lock);
8951 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8952 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
8953 		goto out;
8954 
8955 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
8956 out:
8957 	mutex_unlock(&mlxsw_sp->router->lock);
8958 	return notifier_from_errno(err);
8959 }
8960 
8961 struct mlxsw_sp_inet6addr_event_work {
8962 	struct work_struct work;
8963 	struct mlxsw_sp *mlxsw_sp;
8964 	struct net_device *dev;
8965 	unsigned long event;
8966 };
8967 
8968 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
8969 {
8970 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
8971 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
8972 	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
8973 	struct net_device *dev = inet6addr_work->dev;
8974 	unsigned long event = inet6addr_work->event;
8975 	struct mlxsw_sp_rif *rif;
8976 
8977 	rtnl_lock();
8978 	mutex_lock(&mlxsw_sp->router->lock);
8979 
8980 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8981 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
8982 		goto out;
8983 
8984 	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
8985 out:
8986 	mutex_unlock(&mlxsw_sp->router->lock);
8987 	rtnl_unlock();
8988 	dev_put(dev);
8989 	kfree(inet6addr_work);
8990 }
8991 
8992 /* Called with rcu_read_lock() */
8993 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
8994 				    unsigned long event, void *ptr)
8995 {
8996 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
8997 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
8998 	struct net_device *dev = if6->idev->dev;
8999 	struct mlxsw_sp_router *router;
9000 
9001 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
9002 	if (event == NETDEV_UP)
9003 		return NOTIFY_DONE;
9004 
9005 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
9006 	if (!inet6addr_work)
9007 		return NOTIFY_BAD;
9008 
9009 	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
9010 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
9011 	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
9012 	inet6addr_work->dev = dev;
9013 	inet6addr_work->event = event;
9014 	dev_hold(dev);
9015 	mlxsw_core_schedule_work(&inet6addr_work->work);
9016 
9017 	return NOTIFY_DONE;
9018 }
9019 
9020 static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
9021 					  unsigned long event, void *ptr)
9022 {
9023 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
9024 	struct net_device *dev = i6vi->i6vi_dev->dev;
9025 	struct mlxsw_sp *mlxsw_sp;
9026 	struct mlxsw_sp_rif *rif;
9027 	int err = 0;
9028 
9029 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9030 	if (!mlxsw_sp)
9031 		return NOTIFY_DONE;
9032 
9033 	mutex_lock(&mlxsw_sp->router->lock);
9034 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9035 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9036 		goto out;
9037 
9038 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
9039 out:
9040 	mutex_unlock(&mlxsw_sp->router->lock);
9041 	return notifier_from_errno(err);
9042 }
9043 
9044 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
9045 			     const char *mac, int mtu, u8 mac_profile)
9046 {
9047 	char ritr_pl[MLXSW_REG_RITR_LEN];
9048 	int err;
9049 
9050 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9051 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9052 	if (err)
9053 		return err;
9054 
9055 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
9056 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
9057 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile);
9058 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
9059 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9060 }
9061 
9062 static int
9063 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
9064 				  struct mlxsw_sp_rif *rif,
9065 				  struct netlink_ext_ack *extack)
9066 {
9067 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9068 	u8 old_mac_profile;
9069 	u16 fid_index;
9070 	int err;
9071 
9072 	fid_index = mlxsw_sp_fid_index(rif->fid);
9073 
9074 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
9075 	if (err)
9076 		return err;
9077 
9078 	old_mac_profile = rif->mac_profile_id;
9079 	err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr,
9080 					       extack);
9081 	if (err)
9082 		goto err_rif_mac_profile_replace;
9083 
9084 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
9085 				dev->mtu, rif->mac_profile_id);
9086 	if (err)
9087 		goto err_rif_edit;
9088 
9089 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
9090 	if (err)
9091 		goto err_rif_fdb_op;
9092 
9093 	if (rif->mtu != dev->mtu) {
9094 		struct mlxsw_sp_vr *vr;
9095 		int i;
9096 
9097 		/* The RIF is relevant only to its mr_table instance, as unlike
9098 		 * unicast routing, in multicast routing a RIF cannot be shared
9099 		 * between several multicast routing tables.
9100 		 */
9101 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
9102 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
9103 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
9104 						   rif, dev->mtu);
9105 	}
9106 
9107 	ether_addr_copy(rif->addr, dev->dev_addr);
9108 	rif->mtu = dev->mtu;
9109 
9110 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
9111 
9112 	return 0;
9113 
9114 err_rif_fdb_op:
9115 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu,
9116 			  old_mac_profile);
9117 err_rif_edit:
9118 	mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack);
9119 err_rif_mac_profile_replace:
9120 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
9121 	return err;
9122 }
9123 
9124 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
9125 			    struct netdev_notifier_pre_changeaddr_info *info)
9126 {
9127 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9128 	struct mlxsw_sp_rif_mac_profile *profile;
9129 	struct netlink_ext_ack *extack;
9130 	u8 max_rif_mac_profiles;
9131 	u64 occ;
9132 
9133 	extack = netdev_notifier_info_to_extack(&info->info);
9134 
9135 	profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr);
9136 	if (profile)
9137 		return 0;
9138 
9139 	max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
9140 	occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp);
9141 	if (occ < max_rif_mac_profiles)
9142 		return 0;
9143 
9144 	if (!mlxsw_sp_rif_mac_profile_is_shared(rif))
9145 		return 0;
9146 
9147 	NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles");
9148 	return -ENOBUFS;
9149 }
9150 
9151 static bool mlxsw_sp_is_offload_xstats_event(unsigned long event)
9152 {
9153 	switch (event) {
9154 	case NETDEV_OFFLOAD_XSTATS_ENABLE:
9155 	case NETDEV_OFFLOAD_XSTATS_DISABLE:
9156 	case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9157 	case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9158 		return true;
9159 	}
9160 
9161 	return false;
9162 }
9163 
9164 static int
9165 mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif,
9166 					unsigned long event,
9167 					struct netdev_notifier_offload_xstats_info *info)
9168 {
9169 	switch (info->type) {
9170 	case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
9171 		break;
9172 	default:
9173 		return 0;
9174 	}
9175 
9176 	switch (event) {
9177 	case NETDEV_OFFLOAD_XSTATS_ENABLE:
9178 		return mlxsw_sp_router_port_l3_stats_enable(rif);
9179 	case NETDEV_OFFLOAD_XSTATS_DISABLE:
9180 		mlxsw_sp_router_port_l3_stats_disable(rif);
9181 		return 0;
9182 	case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9183 		mlxsw_sp_router_port_l3_stats_report_used(rif, info);
9184 		return 0;
9185 	case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9186 		return mlxsw_sp_router_port_l3_stats_report_delta(rif, info);
9187 	}
9188 
9189 	WARN_ON_ONCE(1);
9190 	return 0;
9191 }
9192 
9193 static int
9194 mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp,
9195 				      struct net_device *dev,
9196 				      unsigned long event,
9197 				      struct netdev_notifier_offload_xstats_info *info)
9198 {
9199 	struct mlxsw_sp_rif *rif;
9200 
9201 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9202 	if (!rif)
9203 		return 0;
9204 
9205 	return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info);
9206 }
9207 
9208 static bool mlxsw_sp_is_router_event(unsigned long event)
9209 {
9210 	switch (event) {
9211 	case NETDEV_PRE_CHANGEADDR:
9212 	case NETDEV_CHANGEADDR:
9213 	case NETDEV_CHANGEMTU:
9214 		return true;
9215 	default:
9216 		return false;
9217 	}
9218 }
9219 
9220 static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
9221 						unsigned long event, void *ptr)
9222 {
9223 	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
9224 	struct mlxsw_sp *mlxsw_sp;
9225 	struct mlxsw_sp_rif *rif;
9226 
9227 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9228 	if (!mlxsw_sp)
9229 		return 0;
9230 
9231 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9232 	if (!rif)
9233 		return 0;
9234 
9235 	switch (event) {
9236 	case NETDEV_CHANGEMTU:
9237 	case NETDEV_CHANGEADDR:
9238 		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack);
9239 	case NETDEV_PRE_CHANGEADDR:
9240 		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
9241 	default:
9242 		WARN_ON_ONCE(1);
9243 		break;
9244 	}
9245 
9246 	return 0;
9247 }
9248 
9249 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
9250 				  struct net_device *l3_dev,
9251 				  struct netlink_ext_ack *extack)
9252 {
9253 	struct mlxsw_sp_rif *rif;
9254 
9255 	/* If netdev is already associated with a RIF, then we need to
9256 	 * destroy it and create a new one with the new virtual router ID.
9257 	 */
9258 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9259 	if (rif)
9260 		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
9261 					  extack);
9262 
9263 	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
9264 }
9265 
9266 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
9267 				    struct net_device *l3_dev)
9268 {
9269 	struct mlxsw_sp_rif *rif;
9270 
9271 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9272 	if (!rif)
9273 		return;
9274 	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
9275 }
9276 
9277 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
9278 {
9279 	struct netdev_notifier_changeupper_info *info = ptr;
9280 
9281 	if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER)
9282 		return false;
9283 	return netif_is_l3_master(info->upper_dev);
9284 }
9285 
9286 static int
9287 mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
9288 			     struct netdev_notifier_changeupper_info *info)
9289 {
9290 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
9291 	int err = 0;
9292 
9293 	/* We do not create a RIF for a macvlan, but only use it to
9294 	 * direct more MAC addresses to the router.
9295 	 */
9296 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
9297 		return 0;
9298 
9299 	switch (event) {
9300 	case NETDEV_PRECHANGEUPPER:
9301 		break;
9302 	case NETDEV_CHANGEUPPER:
9303 		if (info->linking) {
9304 			struct netlink_ext_ack *extack;
9305 
9306 			extack = netdev_notifier_info_to_extack(&info->info);
9307 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
9308 		} else {
9309 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
9310 		}
9311 		break;
9312 	}
9313 
9314 	return err;
9315 }
9316 
9317 static int
9318 mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port,
9319 				       u16 vid, struct net_device *dev,
9320 				       struct netlink_ext_ack *extack)
9321 {
9322 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
9323 
9324 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
9325 							    vid);
9326 	if (WARN_ON(!mlxsw_sp_port_vlan))
9327 		return -EINVAL;
9328 
9329 	return mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan,
9330 						       dev, extack);
9331 }
9332 
9333 static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9334 					   struct net_device *lag_dev,
9335 					   struct netlink_ext_ack *extack)
9336 {
9337 	u16 default_vid = MLXSW_SP_DEFAULT_VID;
9338 
9339 	return mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port,
9340 						      default_vid, lag_dev,
9341 						      extack);
9342 }
9343 
9344 int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9345 				  struct net_device *lag_dev,
9346 				  struct netlink_ext_ack *extack)
9347 {
9348 	int err;
9349 
9350 	mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
9351 	err = __mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, extack);
9352 	mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
9353 
9354 	return err;
9355 }
9356 
9357 static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb,
9358 					   unsigned long event, void *ptr)
9359 {
9360 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
9361 	struct mlxsw_sp_router *router;
9362 	struct mlxsw_sp *mlxsw_sp;
9363 	int err = 0;
9364 
9365 	router = container_of(nb, struct mlxsw_sp_router, netdevice_nb);
9366 	mlxsw_sp = router->mlxsw_sp;
9367 
9368 	mutex_lock(&mlxsw_sp->router->lock);
9369 
9370 	if (mlxsw_sp_is_offload_xstats_event(event))
9371 		err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev,
9372 							    event, ptr);
9373 	else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
9374 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
9375 						       event, ptr);
9376 	else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
9377 		err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
9378 						       event, ptr);
9379 	else if (mlxsw_sp_is_router_event(event))
9380 		err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
9381 	else if (mlxsw_sp_is_vrf_event(event, ptr))
9382 		err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
9383 
9384 	mutex_unlock(&mlxsw_sp->router->lock);
9385 
9386 	return notifier_from_errno(err);
9387 }
9388 
9389 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
9390 					struct netdev_nested_priv *priv)
9391 {
9392 	struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
9393 
9394 	if (!netif_is_macvlan(dev))
9395 		return 0;
9396 
9397 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9398 				   mlxsw_sp_fid_index(rif->fid), false);
9399 }
9400 
9401 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
9402 {
9403 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9404 	struct netdev_nested_priv priv = {
9405 		.data = (void *)rif,
9406 	};
9407 
9408 	if (!netif_is_macvlan_port(dev))
9409 		return 0;
9410 
9411 	netdev_warn(dev, "Router interface is deleted. Upper macvlans will not work\n");
9412 	return netdev_walk_all_upper_dev_rcu(dev,
9413 					     __mlxsw_sp_rif_macvlan_flush, &priv);
9414 }
9415 
9416 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
9417 				       const struct mlxsw_sp_rif_params *params)
9418 {
9419 	struct mlxsw_sp_rif_subport *rif_subport;
9420 
9421 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
9422 	refcount_set(&rif_subport->ref_count, 1);
9423 	rif_subport->vid = params->vid;
9424 	rif_subport->lag = params->lag;
9425 	if (params->lag)
9426 		rif_subport->lag_id = params->lag_id;
9427 	else
9428 		rif_subport->system_port = params->system_port;
9429 }
9430 
9431 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
9432 {
9433 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9434 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9435 	struct mlxsw_sp_rif_subport *rif_subport;
9436 	char ritr_pl[MLXSW_REG_RITR_LEN];
9437 	u16 efid;
9438 
9439 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
9440 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
9441 			    rif->rif_index, rif->vr_id, dev->mtu);
9442 	mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
9443 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
9444 	efid = mlxsw_sp_fid_index(rif->fid);
9445 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
9446 				  rif_subport->lag ? rif_subport->lag_id :
9447 						     rif_subport->system_port,
9448 				  efid, 0);
9449 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9450 }
9451 
9452 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif,
9453 					  struct netlink_ext_ack *extack)
9454 {
9455 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9456 	u8 mac_profile;
9457 	int err;
9458 
9459 	err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr,
9460 					   &mac_profile, extack);
9461 	if (err)
9462 		return err;
9463 	rif->mac_profile_id = mac_profile;
9464 
9465 	err = mlxsw_sp_rif_subport_op(rif, true);
9466 	if (err)
9467 		goto err_rif_subport_op;
9468 
9469 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9470 				  mlxsw_sp_fid_index(rif->fid), true);
9471 	if (err)
9472 		goto err_rif_fdb_op;
9473 
9474 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9475 	if (err)
9476 		goto err_fid_rif_set;
9477 
9478 	return 0;
9479 
9480 err_fid_rif_set:
9481 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9482 			    mlxsw_sp_fid_index(rif->fid), false);
9483 err_rif_fdb_op:
9484 	mlxsw_sp_rif_subport_op(rif, false);
9485 err_rif_subport_op:
9486 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile);
9487 	return err;
9488 }
9489 
9490 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
9491 {
9492 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9493 	struct mlxsw_sp_fid *fid = rif->fid;
9494 
9495 	mlxsw_sp_fid_rif_unset(fid);
9496 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9497 			    mlxsw_sp_fid_index(fid), false);
9498 	mlxsw_sp_rif_macvlan_flush(rif);
9499 	mlxsw_sp_rif_subport_op(rif, false);
9500 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9501 }
9502 
9503 static struct mlxsw_sp_fid *
9504 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
9505 			     struct netlink_ext_ack *extack)
9506 {
9507 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
9508 }
9509 
9510 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
9511 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
9512 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
9513 	.setup			= mlxsw_sp_rif_subport_setup,
9514 	.configure		= mlxsw_sp_rif_subport_configure,
9515 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
9516 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
9517 };
9518 
9519 static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable)
9520 {
9521 	enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF;
9522 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9523 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9524 	char ritr_pl[MLXSW_REG_RITR_LEN];
9525 
9526 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
9527 			    dev->mtu);
9528 	mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
9529 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
9530 	mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid);
9531 
9532 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9533 }
9534 
9535 u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
9536 {
9537 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
9538 }
9539 
9540 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
9541 				      struct netlink_ext_ack *extack)
9542 {
9543 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9544 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9545 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9546 	u8 mac_profile;
9547 	int err;
9548 
9549 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
9550 					   &mac_profile, extack);
9551 	if (err)
9552 		return err;
9553 	rif->mac_profile_id = mac_profile;
9554 
9555 	err = mlxsw_sp_rif_fid_op(rif, fid_index, true);
9556 	if (err)
9557 		goto err_rif_fid_op;
9558 
9559 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9560 				     mlxsw_sp_router_port(mlxsw_sp), true);
9561 	if (err)
9562 		goto err_fid_mc_flood_set;
9563 
9564 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9565 				     mlxsw_sp_router_port(mlxsw_sp), true);
9566 	if (err)
9567 		goto err_fid_bc_flood_set;
9568 
9569 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9570 				  mlxsw_sp_fid_index(rif->fid), true);
9571 	if (err)
9572 		goto err_rif_fdb_op;
9573 
9574 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9575 	if (err)
9576 		goto err_fid_rif_set;
9577 
9578 	return 0;
9579 
9580 err_fid_rif_set:
9581 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9582 			    mlxsw_sp_fid_index(rif->fid), false);
9583 err_rif_fdb_op:
9584 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9585 			       mlxsw_sp_router_port(mlxsw_sp), false);
9586 err_fid_bc_flood_set:
9587 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9588 			       mlxsw_sp_router_port(mlxsw_sp), false);
9589 err_fid_mc_flood_set:
9590 	mlxsw_sp_rif_fid_op(rif, fid_index, false);
9591 err_rif_fid_op:
9592 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
9593 	return err;
9594 }
9595 
9596 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
9597 {
9598 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9599 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9600 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9601 	struct mlxsw_sp_fid *fid = rif->fid;
9602 
9603 	mlxsw_sp_fid_rif_unset(fid);
9604 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9605 			    mlxsw_sp_fid_index(fid), false);
9606 	mlxsw_sp_rif_macvlan_flush(rif);
9607 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9608 			       mlxsw_sp_router_port(mlxsw_sp), false);
9609 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9610 			       mlxsw_sp_router_port(mlxsw_sp), false);
9611 	mlxsw_sp_rif_fid_op(rif, fid_index, false);
9612 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9613 }
9614 
9615 static struct mlxsw_sp_fid *
9616 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
9617 			 struct netlink_ext_ack *extack)
9618 {
9619 	int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif);
9620 
9621 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif_ifindex);
9622 }
9623 
9624 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9625 {
9626 	struct switchdev_notifier_fdb_info info = {};
9627 	struct net_device *dev;
9628 
9629 	dev = br_fdb_find_port(mlxsw_sp_rif_dev(rif), mac, 0);
9630 	if (!dev)
9631 		return;
9632 
9633 	info.addr = mac;
9634 	info.vid = 0;
9635 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9636 				 NULL);
9637 }
9638 
9639 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
9640 	.type			= MLXSW_SP_RIF_TYPE_FID,
9641 	.rif_size		= sizeof(struct mlxsw_sp_rif),
9642 	.configure		= mlxsw_sp_rif_fid_configure,
9643 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
9644 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
9645 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
9646 };
9647 
9648 static struct mlxsw_sp_fid *
9649 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
9650 			  struct netlink_ext_ack *extack)
9651 {
9652 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9653 	struct net_device *br_dev;
9654 	u16 vid;
9655 	int err;
9656 
9657 	if (is_vlan_dev(dev)) {
9658 		vid = vlan_dev_vlan_id(dev);
9659 		br_dev = vlan_dev_real_dev(dev);
9660 		if (WARN_ON(!netif_is_bridge_master(br_dev)))
9661 			return ERR_PTR(-EINVAL);
9662 	} else {
9663 		err = br_vlan_get_pvid(dev, &vid);
9664 		if (err < 0 || !vid) {
9665 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
9666 			return ERR_PTR(-EINVAL);
9667 		}
9668 	}
9669 
9670 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
9671 }
9672 
9673 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9674 {
9675 	struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
9676 	struct switchdev_notifier_fdb_info info = {};
9677 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9678 	struct net_device *br_dev;
9679 	struct net_device *dev;
9680 
9681 	br_dev = is_vlan_dev(rif_dev) ? vlan_dev_real_dev(rif_dev) : rif_dev;
9682 	dev = br_fdb_find_port(br_dev, mac, vid);
9683 	if (!dev)
9684 		return;
9685 
9686 	info.addr = mac;
9687 	info.vid = vid;
9688 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9689 				 NULL);
9690 }
9691 
9692 static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid,
9693 				bool enable)
9694 {
9695 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9696 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9697 	char ritr_pl[MLXSW_REG_RITR_LEN];
9698 
9699 	mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id,
9700 				    dev->mtu, dev->dev_addr,
9701 				    rif->mac_profile_id, vid, efid);
9702 
9703 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9704 }
9705 
9706 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid,
9707 				       struct netlink_ext_ack *extack)
9708 {
9709 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9710 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9711 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9712 	u8 mac_profile;
9713 	int err;
9714 
9715 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
9716 					   &mac_profile, extack);
9717 	if (err)
9718 		return err;
9719 	rif->mac_profile_id = mac_profile;
9720 
9721 	err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true);
9722 	if (err)
9723 		goto err_rif_vlan_fid_op;
9724 
9725 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9726 				     mlxsw_sp_router_port(mlxsw_sp), true);
9727 	if (err)
9728 		goto err_fid_mc_flood_set;
9729 
9730 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9731 				     mlxsw_sp_router_port(mlxsw_sp), true);
9732 	if (err)
9733 		goto err_fid_bc_flood_set;
9734 
9735 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9736 				  mlxsw_sp_fid_index(rif->fid), true);
9737 	if (err)
9738 		goto err_rif_fdb_op;
9739 
9740 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9741 	if (err)
9742 		goto err_fid_rif_set;
9743 
9744 	return 0;
9745 
9746 err_fid_rif_set:
9747 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9748 			    mlxsw_sp_fid_index(rif->fid), false);
9749 err_rif_fdb_op:
9750 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9751 			       mlxsw_sp_router_port(mlxsw_sp), false);
9752 err_fid_bc_flood_set:
9753 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9754 			       mlxsw_sp_router_port(mlxsw_sp), false);
9755 err_fid_mc_flood_set:
9756 	mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
9757 err_rif_vlan_fid_op:
9758 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
9759 	return err;
9760 }
9761 
9762 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
9763 {
9764 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9765 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9766 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9767 
9768 	mlxsw_sp_fid_rif_unset(rif->fid);
9769 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9770 			    mlxsw_sp_fid_index(rif->fid), false);
9771 	mlxsw_sp_rif_macvlan_flush(rif);
9772 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9773 			       mlxsw_sp_router_port(mlxsw_sp), false);
9774 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9775 			       mlxsw_sp_router_port(mlxsw_sp), false);
9776 	mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
9777 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9778 }
9779 
9780 static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif,
9781 					struct netlink_ext_ack *extack)
9782 {
9783 	return mlxsw_sp_rif_vlan_configure(rif, 0, extack);
9784 }
9785 
9786 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = {
9787 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
9788 	.rif_size		= sizeof(struct mlxsw_sp_rif),
9789 	.configure		= mlxsw_sp1_rif_vlan_configure,
9790 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
9791 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
9792 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
9793 };
9794 
9795 static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif,
9796 					struct netlink_ext_ack *extack)
9797 {
9798 	u16 efid = mlxsw_sp_fid_index(rif->fid);
9799 
9800 	return mlxsw_sp_rif_vlan_configure(rif, efid, extack);
9801 }
9802 
9803 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = {
9804 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
9805 	.rif_size		= sizeof(struct mlxsw_sp_rif),
9806 	.configure		= mlxsw_sp2_rif_vlan_configure,
9807 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
9808 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
9809 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
9810 };
9811 
9812 static struct mlxsw_sp_rif_ipip_lb *
9813 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
9814 {
9815 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
9816 }
9817 
9818 static void
9819 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
9820 			   const struct mlxsw_sp_rif_params *params)
9821 {
9822 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
9823 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
9824 
9825 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
9826 				 common);
9827 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
9828 	rif_lb->lb_config = params_lb->lb_config;
9829 }
9830 
9831 static int
9832 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
9833 				struct netlink_ext_ack *extack)
9834 {
9835 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9836 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9837 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
9838 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9839 	struct mlxsw_sp_vr *ul_vr;
9840 	int err;
9841 
9842 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack);
9843 	if (IS_ERR(ul_vr))
9844 		return PTR_ERR(ul_vr);
9845 
9846 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
9847 	if (err)
9848 		goto err_loopback_op;
9849 
9850 	lb_rif->ul_vr_id = ul_vr->id;
9851 	lb_rif->ul_rif_id = 0;
9852 	++ul_vr->rif_count;
9853 	return 0;
9854 
9855 err_loopback_op:
9856 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
9857 	return err;
9858 }
9859 
9860 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
9861 {
9862 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9863 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9864 	struct mlxsw_sp_vr *ul_vr;
9865 
9866 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
9867 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
9868 
9869 	--ul_vr->rif_count;
9870 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
9871 }
9872 
9873 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
9874 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
9875 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
9876 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
9877 	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
9878 	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
9879 };
9880 
9881 static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
9882 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
9883 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp1_rif_vlan_ops,
9884 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
9885 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
9886 };
9887 
9888 static int
9889 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
9890 {
9891 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9892 	char ritr_pl[MLXSW_REG_RITR_LEN];
9893 
9894 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
9895 			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
9896 	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
9897 					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
9898 
9899 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9900 }
9901 
9902 static struct mlxsw_sp_rif *
9903 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
9904 		       struct netlink_ext_ack *extack)
9905 {
9906 	struct mlxsw_sp_rif *ul_rif;
9907 	u8 rif_entries = 1;
9908 	u16 rif_index;
9909 	int err;
9910 
9911 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
9912 	if (err) {
9913 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
9914 		return ERR_PTR(err);
9915 	}
9916 
9917 	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
9918 	if (!ul_rif) {
9919 		err = -ENOMEM;
9920 		goto err_rif_alloc;
9921 	}
9922 
9923 	mlxsw_sp->router->rifs[rif_index] = ul_rif;
9924 	ul_rif->mlxsw_sp = mlxsw_sp;
9925 	ul_rif->rif_entries = rif_entries;
9926 	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
9927 	if (err)
9928 		goto ul_rif_op_err;
9929 
9930 	atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
9931 	return ul_rif;
9932 
9933 ul_rif_op_err:
9934 	mlxsw_sp->router->rifs[rif_index] = NULL;
9935 	mlxsw_sp_rif_free(ul_rif);
9936 err_rif_alloc:
9937 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
9938 	return ERR_PTR(err);
9939 }
9940 
9941 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
9942 {
9943 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9944 	u8 rif_entries = ul_rif->rif_entries;
9945 	u16 rif_index = ul_rif->rif_index;
9946 
9947 	atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
9948 	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
9949 	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
9950 	mlxsw_sp_rif_free(ul_rif);
9951 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
9952 }
9953 
9954 static struct mlxsw_sp_rif *
9955 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
9956 		    struct netlink_ext_ack *extack)
9957 {
9958 	struct mlxsw_sp_vr *vr;
9959 	int err;
9960 
9961 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
9962 	if (IS_ERR(vr))
9963 		return ERR_CAST(vr);
9964 
9965 	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
9966 		return vr->ul_rif;
9967 
9968 	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
9969 	if (IS_ERR(vr->ul_rif)) {
9970 		err = PTR_ERR(vr->ul_rif);
9971 		goto err_ul_rif_create;
9972 	}
9973 
9974 	vr->rif_count++;
9975 	refcount_set(&vr->ul_rif_refcnt, 1);
9976 
9977 	return vr->ul_rif;
9978 
9979 err_ul_rif_create:
9980 	mlxsw_sp_vr_put(mlxsw_sp, vr);
9981 	return ERR_PTR(err);
9982 }
9983 
9984 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
9985 {
9986 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9987 	struct mlxsw_sp_vr *vr;
9988 
9989 	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
9990 
9991 	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
9992 		return;
9993 
9994 	vr->rif_count--;
9995 	mlxsw_sp_ul_rif_destroy(ul_rif);
9996 	mlxsw_sp_vr_put(mlxsw_sp, vr);
9997 }
9998 
9999 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
10000 			       u16 *ul_rif_index)
10001 {
10002 	struct mlxsw_sp_rif *ul_rif;
10003 	int err = 0;
10004 
10005 	mutex_lock(&mlxsw_sp->router->lock);
10006 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
10007 	if (IS_ERR(ul_rif)) {
10008 		err = PTR_ERR(ul_rif);
10009 		goto out;
10010 	}
10011 	*ul_rif_index = ul_rif->rif_index;
10012 out:
10013 	mutex_unlock(&mlxsw_sp->router->lock);
10014 	return err;
10015 }
10016 
10017 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
10018 {
10019 	struct mlxsw_sp_rif *ul_rif;
10020 
10021 	mutex_lock(&mlxsw_sp->router->lock);
10022 	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
10023 	if (WARN_ON(!ul_rif))
10024 		goto out;
10025 
10026 	mlxsw_sp_ul_rif_put(ul_rif);
10027 out:
10028 	mutex_unlock(&mlxsw_sp->router->lock);
10029 }
10030 
10031 static int
10032 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
10033 				struct netlink_ext_ack *extack)
10034 {
10035 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10036 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10037 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
10038 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10039 	struct mlxsw_sp_rif *ul_rif;
10040 	int err;
10041 
10042 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, extack);
10043 	if (IS_ERR(ul_rif))
10044 		return PTR_ERR(ul_rif);
10045 
10046 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
10047 	if (err)
10048 		goto err_loopback_op;
10049 
10050 	lb_rif->ul_vr_id = 0;
10051 	lb_rif->ul_rif_id = ul_rif->rif_index;
10052 
10053 	return 0;
10054 
10055 err_loopback_op:
10056 	mlxsw_sp_ul_rif_put(ul_rif);
10057 	return err;
10058 }
10059 
10060 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
10061 {
10062 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10063 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10064 	struct mlxsw_sp_rif *ul_rif;
10065 
10066 	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
10067 	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
10068 	mlxsw_sp_ul_rif_put(ul_rif);
10069 }
10070 
10071 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
10072 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
10073 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
10074 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
10075 	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
10076 	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
10077 };
10078 
10079 static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
10080 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
10081 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp2_rif_vlan_ops,
10082 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
10083 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
10084 };
10085 
10086 static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp)
10087 {
10088 	struct gen_pool *rifs_table;
10089 	int err;
10090 
10091 	rifs_table = gen_pool_create(0, -1);
10092 	if (!rifs_table)
10093 		return -ENOMEM;
10094 
10095 	gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align,
10096 			  NULL);
10097 
10098 	err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET,
10099 			   MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1);
10100 	if (err)
10101 		goto err_gen_pool_add;
10102 
10103 	mlxsw_sp->router->rifs_table = rifs_table;
10104 
10105 	return 0;
10106 
10107 err_gen_pool_add:
10108 	gen_pool_destroy(rifs_table);
10109 	return err;
10110 }
10111 
10112 static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp)
10113 {
10114 	gen_pool_destroy(mlxsw_sp->router->rifs_table);
10115 }
10116 
10117 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
10118 {
10119 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10120 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10121 	struct mlxsw_core *core = mlxsw_sp->core;
10122 	int err;
10123 
10124 	if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES))
10125 		return -EIO;
10126 	mlxsw_sp->router->max_rif_mac_profile =
10127 		MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES);
10128 
10129 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
10130 					 sizeof(struct mlxsw_sp_rif *),
10131 					 GFP_KERNEL);
10132 	if (!mlxsw_sp->router->rifs)
10133 		return -ENOMEM;
10134 
10135 	err = mlxsw_sp_rifs_table_init(mlxsw_sp);
10136 	if (err)
10137 		goto err_rifs_table_init;
10138 
10139 	idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
10140 	atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
10141 	atomic_set(&mlxsw_sp->router->rifs_count, 0);
10142 	devl_resource_occ_get_register(devlink,
10143 				       MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
10144 				       mlxsw_sp_rif_mac_profiles_occ_get,
10145 				       mlxsw_sp);
10146 	devl_resource_occ_get_register(devlink,
10147 				       MLXSW_SP_RESOURCE_RIFS,
10148 				       mlxsw_sp_rifs_occ_get,
10149 				       mlxsw_sp);
10150 
10151 	return 0;
10152 
10153 err_rifs_table_init:
10154 	kfree(mlxsw_sp->router->rifs);
10155 	return err;
10156 }
10157 
10158 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
10159 {
10160 	int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10161 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10162 	int i;
10163 
10164 	WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
10165 	for (i = 0; i < max_rifs; i++)
10166 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
10167 
10168 	devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
10169 	devl_resource_occ_get_unregister(devlink,
10170 					 MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
10171 	WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
10172 	idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr);
10173 	mlxsw_sp_rifs_table_fini(mlxsw_sp);
10174 	kfree(mlxsw_sp->router->rifs);
10175 }
10176 
10177 static int
10178 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
10179 {
10180 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
10181 
10182 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
10183 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
10184 }
10185 
10186 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
10187 {
10188 	int err;
10189 
10190 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
10191 
10192 	err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
10193 	if (err)
10194 		return err;
10195 	err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
10196 	if (err)
10197 		return err;
10198 
10199 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
10200 }
10201 
10202 static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp)
10203 {
10204 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr;
10205 	return mlxsw_sp_ipips_init(mlxsw_sp);
10206 }
10207 
10208 static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp)
10209 {
10210 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr;
10211 	return mlxsw_sp_ipips_init(mlxsw_sp);
10212 }
10213 
10214 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
10215 {
10216 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
10217 }
10218 
10219 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
10220 {
10221 	struct mlxsw_sp_router *router;
10222 
10223 	/* Flush pending FIB notifications and then flush the device's
10224 	 * table before requesting another dump. The FIB notification
10225 	 * block is unregistered, so no need to take RTNL.
10226 	 */
10227 	mlxsw_core_flush_owq();
10228 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
10229 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
10230 }
10231 
10232 #ifdef CONFIG_IP_ROUTE_MULTIPATH
10233 struct mlxsw_sp_mp_hash_config {
10234 	DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
10235 	DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
10236 	DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
10237 	DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
10238 	bool inc_parsing_depth;
10239 };
10240 
10241 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
10242 	bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
10243 
10244 #define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
10245 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
10246 
10247 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
10248 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
10249 
10250 static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
10251 {
10252 	unsigned long *inner_headers = config->inner_headers;
10253 	unsigned long *inner_fields = config->inner_fields;
10254 
10255 	/* IPv4 inner */
10256 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10257 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10258 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10259 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10260 	/* IPv6 inner */
10261 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10262 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10263 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10264 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10265 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10266 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10267 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10268 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10269 }
10270 
10271 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10272 {
10273 	unsigned long *headers = config->headers;
10274 	unsigned long *fields = config->fields;
10275 
10276 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10277 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10278 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10279 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10280 }
10281 
10282 static void
10283 mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
10284 			      u32 hash_fields)
10285 {
10286 	unsigned long *inner_headers = config->inner_headers;
10287 	unsigned long *inner_fields = config->inner_fields;
10288 
10289 	/* IPv4 Inner */
10290 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10291 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10292 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
10293 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10294 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
10295 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10296 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
10297 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
10298 	/* IPv6 inner */
10299 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10300 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10301 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
10302 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10303 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10304 	}
10305 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
10306 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10307 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10308 	}
10309 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
10310 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10311 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
10312 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10313 	/* L4 inner */
10314 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
10315 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
10316 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
10317 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
10318 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
10319 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
10320 }
10321 
10322 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
10323 				   struct mlxsw_sp_mp_hash_config *config)
10324 {
10325 	struct net *net = mlxsw_sp_net(mlxsw_sp);
10326 	unsigned long *headers = config->headers;
10327 	unsigned long *fields = config->fields;
10328 	u32 hash_fields;
10329 
10330 	switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
10331 	case 0:
10332 		mlxsw_sp_mp4_hash_outer_addr(config);
10333 		break;
10334 	case 1:
10335 		mlxsw_sp_mp4_hash_outer_addr(config);
10336 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
10337 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
10338 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10339 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10340 		break;
10341 	case 2:
10342 		/* Outer */
10343 		mlxsw_sp_mp4_hash_outer_addr(config);
10344 		/* Inner */
10345 		mlxsw_sp_mp_hash_inner_l3(config);
10346 		break;
10347 	case 3:
10348 		hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
10349 		/* Outer */
10350 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10351 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10352 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
10353 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
10354 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10355 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
10356 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10357 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
10358 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
10359 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
10360 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10361 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
10362 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10363 		/* Inner */
10364 		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
10365 		break;
10366 	}
10367 }
10368 
10369 static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10370 {
10371 	unsigned long *headers = config->headers;
10372 	unsigned long *fields = config->fields;
10373 
10374 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
10375 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
10376 	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
10377 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
10378 	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
10379 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
10380 }
10381 
10382 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
10383 				   struct mlxsw_sp_mp_hash_config *config)
10384 {
10385 	u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
10386 	unsigned long *headers = config->headers;
10387 	unsigned long *fields = config->fields;
10388 
10389 	switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
10390 	case 0:
10391 		mlxsw_sp_mp6_hash_outer_addr(config);
10392 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10393 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10394 		break;
10395 	case 1:
10396 		mlxsw_sp_mp6_hash_outer_addr(config);
10397 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
10398 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10399 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10400 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10401 		break;
10402 	case 2:
10403 		/* Outer */
10404 		mlxsw_sp_mp6_hash_outer_addr(config);
10405 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10406 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10407 		/* Inner */
10408 		mlxsw_sp_mp_hash_inner_l3(config);
10409 		config->inc_parsing_depth = true;
10410 		break;
10411 	case 3:
10412 		/* Outer */
10413 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
10414 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
10415 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
10416 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
10417 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
10418 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
10419 		}
10420 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
10421 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
10422 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
10423 		}
10424 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
10425 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10426 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
10427 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10428 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
10429 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10430 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
10431 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10432 		/* Inner */
10433 		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
10434 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
10435 			config->inc_parsing_depth = true;
10436 		break;
10437 	}
10438 }
10439 
10440 static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
10441 						 bool old_inc_parsing_depth,
10442 						 bool new_inc_parsing_depth)
10443 {
10444 	int err;
10445 
10446 	if (!old_inc_parsing_depth && new_inc_parsing_depth) {
10447 		err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
10448 		if (err)
10449 			return err;
10450 		mlxsw_sp->router->inc_parsing_depth = true;
10451 	} else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
10452 		mlxsw_sp_parsing_depth_dec(mlxsw_sp);
10453 		mlxsw_sp->router->inc_parsing_depth = false;
10454 	}
10455 
10456 	return 0;
10457 }
10458 
10459 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
10460 {
10461 	bool old_inc_parsing_depth, new_inc_parsing_depth;
10462 	struct mlxsw_sp_mp_hash_config config = {};
10463 	char recr2_pl[MLXSW_REG_RECR2_LEN];
10464 	unsigned long bit;
10465 	u32 seed;
10466 	int err;
10467 
10468 	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
10469 	mlxsw_reg_recr2_pack(recr2_pl, seed);
10470 	mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
10471 	mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
10472 
10473 	old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
10474 	new_inc_parsing_depth = config.inc_parsing_depth;
10475 	err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
10476 						    old_inc_parsing_depth,
10477 						    new_inc_parsing_depth);
10478 	if (err)
10479 		return err;
10480 
10481 	for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
10482 		mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
10483 	for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
10484 		mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
10485 	for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
10486 		mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
10487 	for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
10488 		mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
10489 
10490 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
10491 	if (err)
10492 		goto err_reg_write;
10493 
10494 	return 0;
10495 
10496 err_reg_write:
10497 	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
10498 					      old_inc_parsing_depth);
10499 	return err;
10500 }
10501 
10502 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
10503 {
10504 	bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
10505 
10506 	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth,
10507 					      false);
10508 }
10509 #else
10510 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
10511 {
10512 	return 0;
10513 }
10514 
10515 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
10516 {
10517 }
10518 #endif
10519 
10520 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
10521 {
10522 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
10523 	unsigned int i;
10524 
10525 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
10526 
10527 	/* HW is determining switch priority based on DSCP-bits, but the
10528 	 * kernel is still doing that based on the ToS. Since there's a
10529 	 * mismatch in bits we need to make sure to translate the right
10530 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
10531 	 */
10532 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
10533 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
10534 
10535 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
10536 }
10537 
10538 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
10539 {
10540 	struct net *net = mlxsw_sp_net(mlxsw_sp);
10541 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
10542 	u64 max_rifs;
10543 	bool usp;
10544 
10545 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
10546 		return -EIO;
10547 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10548 	usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
10549 
10550 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
10551 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
10552 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
10553 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
10554 }
10555 
10556 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
10557 {
10558 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
10559 
10560 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
10561 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
10562 }
10563 
10564 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
10565 {
10566 	u16 lb_rif_index;
10567 	int err;
10568 
10569 	/* Create a generic loopback RIF associated with the main table
10570 	 * (default VRF). Any table can be used, but the main table exists
10571 	 * anyway, so we do not waste resources.
10572 	 */
10573 	err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
10574 					 &lb_rif_index);
10575 	if (err)
10576 		return err;
10577 
10578 	mlxsw_sp->router->lb_rif_index = lb_rif_index;
10579 
10580 	return 0;
10581 }
10582 
10583 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
10584 {
10585 	mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
10586 }
10587 
10588 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
10589 {
10590 	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
10591 
10592 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
10593 	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
10594 	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
10595 
10596 	return 0;
10597 }
10598 
10599 const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
10600 	.init = mlxsw_sp1_router_init,
10601 	.ipips_init = mlxsw_sp1_ipips_init,
10602 };
10603 
10604 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
10605 {
10606 	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
10607 
10608 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
10609 	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
10610 	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
10611 
10612 	return 0;
10613 }
10614 
10615 const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
10616 	.init = mlxsw_sp2_router_init,
10617 	.ipips_init = mlxsw_sp2_ipips_init,
10618 };
10619 
10620 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
10621 			 struct netlink_ext_ack *extack)
10622 {
10623 	struct mlxsw_sp_router *router;
10624 	struct notifier_block *nb;
10625 	int err;
10626 
10627 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
10628 	if (!router)
10629 		return -ENOMEM;
10630 	mutex_init(&router->lock);
10631 	mlxsw_sp->router = router;
10632 	router->mlxsw_sp = mlxsw_sp;
10633 
10634 	err = mlxsw_sp->router_ops->init(mlxsw_sp);
10635 	if (err)
10636 		goto err_router_ops_init;
10637 
10638 	INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
10639 	INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
10640 			  mlxsw_sp_nh_grp_activity_work);
10641 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
10642 	err = __mlxsw_sp_router_init(mlxsw_sp);
10643 	if (err)
10644 		goto err_router_init;
10645 
10646 	err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp);
10647 	if (err)
10648 		goto err_ipips_init;
10649 
10650 	err = mlxsw_sp_rifs_init(mlxsw_sp);
10651 	if (err)
10652 		goto err_rifs_init;
10653 
10654 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
10655 			      &mlxsw_sp_nexthop_ht_params);
10656 	if (err)
10657 		goto err_nexthop_ht_init;
10658 
10659 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
10660 			      &mlxsw_sp_nexthop_group_ht_params);
10661 	if (err)
10662 		goto err_nexthop_group_ht_init;
10663 
10664 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
10665 	err = mlxsw_sp_lpm_init(mlxsw_sp);
10666 	if (err)
10667 		goto err_lpm_init;
10668 
10669 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
10670 	if (err)
10671 		goto err_mr_init;
10672 
10673 	err = mlxsw_sp_vrs_init(mlxsw_sp);
10674 	if (err)
10675 		goto err_vrs_init;
10676 
10677 	err = mlxsw_sp_lb_rif_init(mlxsw_sp);
10678 	if (err)
10679 		goto err_lb_rif_init;
10680 
10681 	err = mlxsw_sp_neigh_init(mlxsw_sp);
10682 	if (err)
10683 		goto err_neigh_init;
10684 
10685 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
10686 	if (err)
10687 		goto err_mp_hash_init;
10688 
10689 	err = mlxsw_sp_dscp_init(mlxsw_sp);
10690 	if (err)
10691 		goto err_dscp_init;
10692 
10693 	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
10694 	err = register_inetaddr_notifier(&router->inetaddr_nb);
10695 	if (err)
10696 		goto err_register_inetaddr_notifier;
10697 
10698 	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
10699 	err = register_inet6addr_notifier(&router->inet6addr_nb);
10700 	if (err)
10701 		goto err_register_inet6addr_notifier;
10702 
10703 	router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event;
10704 	err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
10705 	if (err)
10706 		goto err_register_inetaddr_valid_notifier;
10707 
10708 	nb = &router->inet6addr_valid_nb;
10709 	nb->notifier_call = mlxsw_sp_inet6addr_valid_event;
10710 	err = register_inet6addr_validator_notifier(nb);
10711 	if (err)
10712 		goto err_register_inet6addr_valid_notifier;
10713 
10714 	mlxsw_sp->router->netevent_nb.notifier_call =
10715 		mlxsw_sp_router_netevent_event;
10716 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10717 	if (err)
10718 		goto err_register_netevent_notifier;
10719 
10720 	mlxsw_sp->router->nexthop_nb.notifier_call =
10721 		mlxsw_sp_nexthop_obj_event;
10722 	err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10723 					&mlxsw_sp->router->nexthop_nb,
10724 					extack);
10725 	if (err)
10726 		goto err_register_nexthop_notifier;
10727 
10728 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
10729 	err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10730 				    &mlxsw_sp->router->fib_nb,
10731 				    mlxsw_sp_router_fib_dump_flush, extack);
10732 	if (err)
10733 		goto err_register_fib_notifier;
10734 
10735 	mlxsw_sp->router->netdevice_nb.notifier_call =
10736 		mlxsw_sp_router_netdevice_event;
10737 	err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
10738 					      &mlxsw_sp->router->netdevice_nb);
10739 	if (err)
10740 		goto err_register_netdev_notifier;
10741 
10742 	return 0;
10743 
10744 err_register_netdev_notifier:
10745 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10746 				&mlxsw_sp->router->fib_nb);
10747 err_register_fib_notifier:
10748 	unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10749 				    &mlxsw_sp->router->nexthop_nb);
10750 err_register_nexthop_notifier:
10751 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10752 err_register_netevent_notifier:
10753 	unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
10754 err_register_inet6addr_valid_notifier:
10755 	unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
10756 err_register_inetaddr_valid_notifier:
10757 	unregister_inet6addr_notifier(&router->inet6addr_nb);
10758 err_register_inet6addr_notifier:
10759 	unregister_inetaddr_notifier(&router->inetaddr_nb);
10760 err_register_inetaddr_notifier:
10761 	mlxsw_core_flush_owq();
10762 err_dscp_init:
10763 	mlxsw_sp_mp_hash_fini(mlxsw_sp);
10764 err_mp_hash_init:
10765 	mlxsw_sp_neigh_fini(mlxsw_sp);
10766 err_neigh_init:
10767 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
10768 err_lb_rif_init:
10769 	mlxsw_sp_vrs_fini(mlxsw_sp);
10770 err_vrs_init:
10771 	mlxsw_sp_mr_fini(mlxsw_sp);
10772 err_mr_init:
10773 	mlxsw_sp_lpm_fini(mlxsw_sp);
10774 err_lpm_init:
10775 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
10776 err_nexthop_group_ht_init:
10777 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
10778 err_nexthop_ht_init:
10779 	mlxsw_sp_rifs_fini(mlxsw_sp);
10780 err_rifs_init:
10781 	mlxsw_sp_ipips_fini(mlxsw_sp);
10782 err_ipips_init:
10783 	__mlxsw_sp_router_fini(mlxsw_sp);
10784 err_router_init:
10785 	cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
10786 err_router_ops_init:
10787 	mutex_destroy(&mlxsw_sp->router->lock);
10788 	kfree(mlxsw_sp->router);
10789 	return err;
10790 }
10791 
10792 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
10793 {
10794 	struct mlxsw_sp_router *router = mlxsw_sp->router;
10795 
10796 	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
10797 					  &router->netdevice_nb);
10798 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb);
10799 	unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10800 				    &router->nexthop_nb);
10801 	unregister_netevent_notifier(&router->netevent_nb);
10802 	unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
10803 	unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
10804 	unregister_inet6addr_notifier(&router->inet6addr_nb);
10805 	unregister_inetaddr_notifier(&router->inetaddr_nb);
10806 	mlxsw_core_flush_owq();
10807 	mlxsw_sp_mp_hash_fini(mlxsw_sp);
10808 	mlxsw_sp_neigh_fini(mlxsw_sp);
10809 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
10810 	mlxsw_sp_vrs_fini(mlxsw_sp);
10811 	mlxsw_sp_mr_fini(mlxsw_sp);
10812 	mlxsw_sp_lpm_fini(mlxsw_sp);
10813 	rhashtable_destroy(&router->nexthop_group_ht);
10814 	rhashtable_destroy(&router->nexthop_ht);
10815 	mlxsw_sp_rifs_fini(mlxsw_sp);
10816 	mlxsw_sp_ipips_fini(mlxsw_sp);
10817 	__mlxsw_sp_router_fini(mlxsw_sp);
10818 	cancel_delayed_work_sync(&router->nh_grp_activity_dw);
10819 	mutex_destroy(&router->lock);
10820 	kfree(router);
10821 }
10822