1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <linux/netdevice.h>
45 #include <net/netevent.h>
46 #include <net/neighbour.h>
47 #include <net/arp.h>
48 #include <net/ip_fib.h>
49 #include <net/fib_rules.h>
50 #include <net/l3mdev.h>
51 
52 #include "spectrum.h"
53 #include "core.h"
54 #include "reg.h"
55 #include "spectrum_cnt.h"
56 #include "spectrum_dpipe.h"
57 #include "spectrum_router.h"
58 
59 struct mlxsw_sp_rif {
60 	struct list_head nexthop_list;
61 	struct list_head neigh_list;
62 	struct net_device *dev;
63 	struct mlxsw_sp_fid *f;
64 	unsigned char addr[ETH_ALEN];
65 	int mtu;
66 	u16 rif_index;
67 	u16 vr_id;
68 	unsigned int counter_ingress;
69 	bool counter_ingress_valid;
70 	unsigned int counter_egress;
71 	bool counter_egress_valid;
72 };
73 
74 static unsigned int *
75 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
76 			   enum mlxsw_sp_rif_counter_dir dir)
77 {
78 	switch (dir) {
79 	case MLXSW_SP_RIF_COUNTER_EGRESS:
80 		return &rif->counter_egress;
81 	case MLXSW_SP_RIF_COUNTER_INGRESS:
82 		return &rif->counter_ingress;
83 	}
84 	return NULL;
85 }
86 
87 static bool
88 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
89 			       enum mlxsw_sp_rif_counter_dir dir)
90 {
91 	switch (dir) {
92 	case MLXSW_SP_RIF_COUNTER_EGRESS:
93 		return rif->counter_egress_valid;
94 	case MLXSW_SP_RIF_COUNTER_INGRESS:
95 		return rif->counter_ingress_valid;
96 	}
97 	return false;
98 }
99 
100 static void
101 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
102 			       enum mlxsw_sp_rif_counter_dir dir,
103 			       bool valid)
104 {
105 	switch (dir) {
106 	case MLXSW_SP_RIF_COUNTER_EGRESS:
107 		rif->counter_egress_valid = valid;
108 		break;
109 	case MLXSW_SP_RIF_COUNTER_INGRESS:
110 		rif->counter_ingress_valid = valid;
111 		break;
112 	}
113 }
114 
115 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
116 				     unsigned int counter_index, bool enable,
117 				     enum mlxsw_sp_rif_counter_dir dir)
118 {
119 	char ritr_pl[MLXSW_REG_RITR_LEN];
120 	bool is_egress = false;
121 	int err;
122 
123 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
124 		is_egress = true;
125 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
126 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
127 	if (err)
128 		return err;
129 
130 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
131 				    is_egress);
132 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
133 }
134 
135 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
136 				   struct mlxsw_sp_rif *rif,
137 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
138 {
139 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
140 	unsigned int *p_counter_index;
141 	bool valid;
142 	int err;
143 
144 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
145 	if (!valid)
146 		return -EINVAL;
147 
148 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
149 	if (!p_counter_index)
150 		return -EINVAL;
151 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
152 			     MLXSW_REG_RICNT_OPCODE_NOP);
153 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
154 	if (err)
155 		return err;
156 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
157 	return 0;
158 }
159 
160 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
161 				      unsigned int counter_index)
162 {
163 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
164 
165 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
166 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
167 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
168 }
169 
170 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
171 			       struct mlxsw_sp_rif *rif,
172 			       enum mlxsw_sp_rif_counter_dir dir)
173 {
174 	unsigned int *p_counter_index;
175 	int err;
176 
177 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
178 	if (!p_counter_index)
179 		return -EINVAL;
180 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
181 				     p_counter_index);
182 	if (err)
183 		return err;
184 
185 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
186 	if (err)
187 		goto err_counter_clear;
188 
189 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
190 					*p_counter_index, true, dir);
191 	if (err)
192 		goto err_counter_edit;
193 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
194 	return 0;
195 
196 err_counter_edit:
197 err_counter_clear:
198 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
199 			      *p_counter_index);
200 	return err;
201 }
202 
203 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
204 			       struct mlxsw_sp_rif *rif,
205 			       enum mlxsw_sp_rif_counter_dir dir)
206 {
207 	unsigned int *p_counter_index;
208 
209 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
210 	if (WARN_ON(!p_counter_index))
211 		return;
212 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
213 				  *p_counter_index, false, dir);
214 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
215 			      *p_counter_index);
216 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
217 }
218 
219 static struct mlxsw_sp_rif *
220 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
221 			 const struct net_device *dev);
222 
223 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
224 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
225 
226 static bool
227 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
228 			     struct mlxsw_sp_prefix_usage *prefix_usage2)
229 {
230 	unsigned char prefix;
231 
232 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
233 		if (!test_bit(prefix, prefix_usage2->b))
234 			return false;
235 	}
236 	return true;
237 }
238 
239 static bool
240 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
241 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
242 {
243 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
244 }
245 
246 static bool
247 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
248 {
249 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
250 
251 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
252 }
253 
254 static void
255 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
256 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
257 {
258 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
259 }
260 
261 static void
262 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
263 			  unsigned char prefix_len)
264 {
265 	set_bit(prefix_len, prefix_usage->b);
266 }
267 
268 static void
269 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
270 			    unsigned char prefix_len)
271 {
272 	clear_bit(prefix_len, prefix_usage->b);
273 }
274 
275 struct mlxsw_sp_fib_key {
276 	unsigned char addr[sizeof(struct in6_addr)];
277 	unsigned char prefix_len;
278 };
279 
280 enum mlxsw_sp_fib_entry_type {
281 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
282 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
283 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
284 };
285 
286 struct mlxsw_sp_nexthop_group;
287 
288 struct mlxsw_sp_fib_node {
289 	struct list_head entry_list;
290 	struct list_head list;
291 	struct rhash_head ht_node;
292 	struct mlxsw_sp_fib *fib;
293 	struct mlxsw_sp_fib_key key;
294 };
295 
296 struct mlxsw_sp_fib_entry_params {
297 	u32 tb_id;
298 	u32 prio;
299 	u8 tos;
300 	u8 type;
301 };
302 
303 struct mlxsw_sp_fib_entry {
304 	struct list_head list;
305 	struct mlxsw_sp_fib_node *fib_node;
306 	enum mlxsw_sp_fib_entry_type type;
307 	struct list_head nexthop_group_node;
308 	struct mlxsw_sp_nexthop_group *nh_group;
309 	struct mlxsw_sp_fib_entry_params params;
310 	bool offloaded;
311 };
312 
313 struct mlxsw_sp_fib {
314 	struct rhashtable ht;
315 	struct list_head node_list;
316 	struct mlxsw_sp_vr *vr;
317 	struct mlxsw_sp_lpm_tree *lpm_tree;
318 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
319 	struct mlxsw_sp_prefix_usage prefix_usage;
320 	enum mlxsw_sp_l3proto proto;
321 };
322 
323 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
324 
325 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
326 						enum mlxsw_sp_l3proto proto)
327 {
328 	struct mlxsw_sp_fib *fib;
329 	int err;
330 
331 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
332 	if (!fib)
333 		return ERR_PTR(-ENOMEM);
334 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
335 	if (err)
336 		goto err_rhashtable_init;
337 	INIT_LIST_HEAD(&fib->node_list);
338 	fib->proto = proto;
339 	fib->vr = vr;
340 	return fib;
341 
342 err_rhashtable_init:
343 	kfree(fib);
344 	return ERR_PTR(err);
345 }
346 
347 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
348 {
349 	WARN_ON(!list_empty(&fib->node_list));
350 	WARN_ON(fib->lpm_tree);
351 	rhashtable_destroy(&fib->ht);
352 	kfree(fib);
353 }
354 
355 static struct mlxsw_sp_lpm_tree *
356 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
357 {
358 	static struct mlxsw_sp_lpm_tree *lpm_tree;
359 	int i;
360 
361 	for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
362 		lpm_tree = &mlxsw_sp->router.lpm.trees[i];
363 		if (lpm_tree->ref_count == 0)
364 			return lpm_tree;
365 	}
366 	return NULL;
367 }
368 
369 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
370 				   struct mlxsw_sp_lpm_tree *lpm_tree)
371 {
372 	char ralta_pl[MLXSW_REG_RALTA_LEN];
373 
374 	mlxsw_reg_ralta_pack(ralta_pl, true,
375 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
376 			     lpm_tree->id);
377 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
378 }
379 
380 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
381 				  struct mlxsw_sp_lpm_tree *lpm_tree)
382 {
383 	char ralta_pl[MLXSW_REG_RALTA_LEN];
384 
385 	mlxsw_reg_ralta_pack(ralta_pl, false,
386 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
387 			     lpm_tree->id);
388 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
389 }
390 
391 static int
392 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
393 				  struct mlxsw_sp_prefix_usage *prefix_usage,
394 				  struct mlxsw_sp_lpm_tree *lpm_tree)
395 {
396 	char ralst_pl[MLXSW_REG_RALST_LEN];
397 	u8 root_bin = 0;
398 	u8 prefix;
399 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
400 
401 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
402 		root_bin = prefix;
403 
404 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
405 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
406 		if (prefix == 0)
407 			continue;
408 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
409 					 MLXSW_REG_RALST_BIN_NO_CHILD);
410 		last_prefix = prefix;
411 	}
412 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
413 }
414 
415 static struct mlxsw_sp_lpm_tree *
416 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
417 			 struct mlxsw_sp_prefix_usage *prefix_usage,
418 			 enum mlxsw_sp_l3proto proto)
419 {
420 	struct mlxsw_sp_lpm_tree *lpm_tree;
421 	int err;
422 
423 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
424 	if (!lpm_tree)
425 		return ERR_PTR(-EBUSY);
426 	lpm_tree->proto = proto;
427 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
428 	if (err)
429 		return ERR_PTR(err);
430 
431 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
432 						lpm_tree);
433 	if (err)
434 		goto err_left_struct_set;
435 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
436 	       sizeof(lpm_tree->prefix_usage));
437 	return lpm_tree;
438 
439 err_left_struct_set:
440 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
441 	return ERR_PTR(err);
442 }
443 
444 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
445 				     struct mlxsw_sp_lpm_tree *lpm_tree)
446 {
447 	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
448 }
449 
450 static struct mlxsw_sp_lpm_tree *
451 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
452 		      struct mlxsw_sp_prefix_usage *prefix_usage,
453 		      enum mlxsw_sp_l3proto proto)
454 {
455 	struct mlxsw_sp_lpm_tree *lpm_tree;
456 	int i;
457 
458 	for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
459 		lpm_tree = &mlxsw_sp->router.lpm.trees[i];
460 		if (lpm_tree->ref_count != 0 &&
461 		    lpm_tree->proto == proto &&
462 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
463 					     prefix_usage))
464 			goto inc_ref_count;
465 	}
466 	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
467 					    proto);
468 	if (IS_ERR(lpm_tree))
469 		return lpm_tree;
470 
471 inc_ref_count:
472 	lpm_tree->ref_count++;
473 	return lpm_tree;
474 }
475 
476 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
477 				 struct mlxsw_sp_lpm_tree *lpm_tree)
478 {
479 	if (--lpm_tree->ref_count == 0)
480 		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
481 	return 0;
482 }
483 
484 #define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */
485 
486 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
487 {
488 	struct mlxsw_sp_lpm_tree *lpm_tree;
489 	u64 max_trees;
490 	int i;
491 
492 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
493 		return -EIO;
494 
495 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
496 	mlxsw_sp->router.lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
497 	mlxsw_sp->router.lpm.trees = kcalloc(mlxsw_sp->router.lpm.tree_count,
498 					     sizeof(struct mlxsw_sp_lpm_tree),
499 					     GFP_KERNEL);
500 	if (!mlxsw_sp->router.lpm.trees)
501 		return -ENOMEM;
502 
503 	for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
504 		lpm_tree = &mlxsw_sp->router.lpm.trees[i];
505 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
506 	}
507 
508 	return 0;
509 }
510 
511 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
512 {
513 	kfree(mlxsw_sp->router.lpm.trees);
514 }
515 
516 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
517 {
518 	return !!vr->fib4;
519 }
520 
521 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
522 {
523 	struct mlxsw_sp_vr *vr;
524 	int i;
525 
526 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
527 		vr = &mlxsw_sp->router.vrs[i];
528 		if (!mlxsw_sp_vr_is_used(vr))
529 			return vr;
530 	}
531 	return NULL;
532 }
533 
534 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
535 				     const struct mlxsw_sp_fib *fib)
536 {
537 	char raltb_pl[MLXSW_REG_RALTB_LEN];
538 
539 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
540 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
541 			     fib->lpm_tree->id);
542 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
543 }
544 
545 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
546 				       const struct mlxsw_sp_fib *fib)
547 {
548 	char raltb_pl[MLXSW_REG_RALTB_LEN];
549 
550 	/* Bind to tree 0 which is default */
551 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
552 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
553 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
554 }
555 
556 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
557 {
558 	/* For our purpose, squash main and local table into one */
559 	if (tb_id == RT_TABLE_LOCAL)
560 		tb_id = RT_TABLE_MAIN;
561 	return tb_id;
562 }
563 
564 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
565 					    u32 tb_id)
566 {
567 	struct mlxsw_sp_vr *vr;
568 	int i;
569 
570 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
571 
572 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
573 		vr = &mlxsw_sp->router.vrs[i];
574 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
575 			return vr;
576 	}
577 	return NULL;
578 }
579 
580 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
581 					    enum mlxsw_sp_l3proto proto)
582 {
583 	switch (proto) {
584 	case MLXSW_SP_L3_PROTO_IPV4:
585 		return vr->fib4;
586 	case MLXSW_SP_L3_PROTO_IPV6:
587 		BUG_ON(1);
588 	}
589 	return NULL;
590 }
591 
592 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
593 					      u32 tb_id)
594 {
595 	struct mlxsw_sp_vr *vr;
596 
597 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
598 	if (!vr)
599 		return ERR_PTR(-EBUSY);
600 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
601 	if (IS_ERR(vr->fib4))
602 		return ERR_CAST(vr->fib4);
603 	vr->tb_id = tb_id;
604 	return vr;
605 }
606 
607 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
608 {
609 	mlxsw_sp_fib_destroy(vr->fib4);
610 	vr->fib4 = NULL;
611 }
612 
613 static int
614 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib,
615 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
616 {
617 	struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree;
618 	struct mlxsw_sp_lpm_tree *new_tree;
619 	int err;
620 
621 	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
622 		return 0;
623 
624 	new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
625 					 fib->proto);
626 	if (IS_ERR(new_tree)) {
627 		/* We failed to get a tree according to the required
628 		 * prefix usage. However, the current tree might be still good
629 		 * for us if our requirement is subset of the prefixes used
630 		 * in the tree.
631 		 */
632 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
633 						 &lpm_tree->prefix_usage))
634 			return 0;
635 		return PTR_ERR(new_tree);
636 	}
637 
638 	/* Prevent packet loss by overwriting existing binding */
639 	fib->lpm_tree = new_tree;
640 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
641 	if (err)
642 		goto err_tree_bind;
643 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
644 
645 	return 0;
646 
647 err_tree_bind:
648 	fib->lpm_tree = lpm_tree;
649 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
650 	return err;
651 }
652 
653 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
654 {
655 	struct mlxsw_sp_vr *vr;
656 
657 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
658 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
659 	if (!vr)
660 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
661 	return vr;
662 }
663 
664 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
665 {
666 	if (!vr->rif_count && list_empty(&vr->fib4->node_list))
667 		mlxsw_sp_vr_destroy(vr);
668 }
669 
670 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
671 {
672 	struct mlxsw_sp_vr *vr;
673 	u64 max_vrs;
674 	int i;
675 
676 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
677 		return -EIO;
678 
679 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
680 	mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
681 				       GFP_KERNEL);
682 	if (!mlxsw_sp->router.vrs)
683 		return -ENOMEM;
684 
685 	for (i = 0; i < max_vrs; i++) {
686 		vr = &mlxsw_sp->router.vrs[i];
687 		vr->id = i;
688 	}
689 
690 	return 0;
691 }
692 
693 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
694 
695 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
696 {
697 	/* At this stage we're guaranteed not to have new incoming
698 	 * FIB notifications and the work queue is free from FIBs
699 	 * sitting on top of mlxsw netdevs. However, we can still
700 	 * have other FIBs queued. Flush the queue before flushing
701 	 * the device's tables. No need for locks, as we're the only
702 	 * writer.
703 	 */
704 	mlxsw_core_flush_owq();
705 	mlxsw_sp_router_fib_flush(mlxsw_sp);
706 	kfree(mlxsw_sp->router.vrs);
707 }
708 
709 struct mlxsw_sp_neigh_key {
710 	struct neighbour *n;
711 };
712 
713 struct mlxsw_sp_neigh_entry {
714 	struct list_head rif_list_node;
715 	struct rhash_head ht_node;
716 	struct mlxsw_sp_neigh_key key;
717 	u16 rif;
718 	bool connected;
719 	unsigned char ha[ETH_ALEN];
720 	struct list_head nexthop_list; /* list of nexthops using
721 					* this neigh entry
722 					*/
723 	struct list_head nexthop_neighs_list_node;
724 };
725 
726 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
727 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
728 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
729 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
730 };
731 
732 static struct mlxsw_sp_neigh_entry *
733 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
734 			   u16 rif)
735 {
736 	struct mlxsw_sp_neigh_entry *neigh_entry;
737 
738 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
739 	if (!neigh_entry)
740 		return NULL;
741 
742 	neigh_entry->key.n = n;
743 	neigh_entry->rif = rif;
744 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
745 
746 	return neigh_entry;
747 }
748 
749 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
750 {
751 	kfree(neigh_entry);
752 }
753 
754 static int
755 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
756 			    struct mlxsw_sp_neigh_entry *neigh_entry)
757 {
758 	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
759 				      &neigh_entry->ht_node,
760 				      mlxsw_sp_neigh_ht_params);
761 }
762 
763 static void
764 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
765 			    struct mlxsw_sp_neigh_entry *neigh_entry)
766 {
767 	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
768 			       &neigh_entry->ht_node,
769 			       mlxsw_sp_neigh_ht_params);
770 }
771 
772 static struct mlxsw_sp_neigh_entry *
773 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
774 {
775 	struct mlxsw_sp_neigh_entry *neigh_entry;
776 	struct mlxsw_sp_rif *rif;
777 	int err;
778 
779 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
780 	if (!rif)
781 		return ERR_PTR(-EINVAL);
782 
783 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
784 	if (!neigh_entry)
785 		return ERR_PTR(-ENOMEM);
786 
787 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
788 	if (err)
789 		goto err_neigh_entry_insert;
790 
791 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
792 
793 	return neigh_entry;
794 
795 err_neigh_entry_insert:
796 	mlxsw_sp_neigh_entry_free(neigh_entry);
797 	return ERR_PTR(err);
798 }
799 
800 static void
801 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
802 			     struct mlxsw_sp_neigh_entry *neigh_entry)
803 {
804 	list_del(&neigh_entry->rif_list_node);
805 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
806 	mlxsw_sp_neigh_entry_free(neigh_entry);
807 }
808 
809 static struct mlxsw_sp_neigh_entry *
810 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
811 {
812 	struct mlxsw_sp_neigh_key key;
813 
814 	key.n = n;
815 	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
816 				      &key, mlxsw_sp_neigh_ht_params);
817 }
818 
819 static void
820 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
821 {
822 	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
823 
824 	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
825 }
826 
827 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
828 						   char *rauhtd_pl,
829 						   int ent_index)
830 {
831 	struct net_device *dev;
832 	struct neighbour *n;
833 	__be32 dipn;
834 	u32 dip;
835 	u16 rif;
836 
837 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
838 
839 	if (!mlxsw_sp->rifs[rif]) {
840 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
841 		return;
842 	}
843 
844 	dipn = htonl(dip);
845 	dev = mlxsw_sp->rifs[rif]->dev;
846 	n = neigh_lookup(&arp_tbl, &dipn, dev);
847 	if (!n) {
848 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
849 			   &dip);
850 		return;
851 	}
852 
853 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
854 	neigh_event_send(n, NULL);
855 	neigh_release(n);
856 }
857 
858 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
859 						   char *rauhtd_pl,
860 						   int rec_index)
861 {
862 	u8 num_entries;
863 	int i;
864 
865 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
866 								rec_index);
867 	/* Hardware starts counting at 0, so add 1. */
868 	num_entries++;
869 
870 	/* Each record consists of several neighbour entries. */
871 	for (i = 0; i < num_entries; i++) {
872 		int ent_index;
873 
874 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
875 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
876 						       ent_index);
877 	}
878 
879 }
880 
881 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
882 					      char *rauhtd_pl, int rec_index)
883 {
884 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
885 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
886 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
887 						       rec_index);
888 		break;
889 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
890 		WARN_ON_ONCE(1);
891 		break;
892 	}
893 }
894 
895 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
896 {
897 	u8 num_rec, last_rec_index, num_entries;
898 
899 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
900 	last_rec_index = num_rec - 1;
901 
902 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
903 		return false;
904 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
905 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
906 		return true;
907 
908 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
909 								last_rec_index);
910 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
911 		return true;
912 	return false;
913 }
914 
915 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
916 {
917 	char *rauhtd_pl;
918 	u8 num_rec;
919 	int i, err;
920 
921 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
922 	if (!rauhtd_pl)
923 		return -ENOMEM;
924 
925 	/* Make sure the neighbour's netdev isn't removed in the
926 	 * process.
927 	 */
928 	rtnl_lock();
929 	do {
930 		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
931 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
932 				      rauhtd_pl);
933 		if (err) {
934 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
935 			break;
936 		}
937 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
938 		for (i = 0; i < num_rec; i++)
939 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
940 							  i);
941 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
942 	rtnl_unlock();
943 
944 	kfree(rauhtd_pl);
945 	return err;
946 }
947 
948 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
949 {
950 	struct mlxsw_sp_neigh_entry *neigh_entry;
951 
952 	/* Take RTNL mutex here to prevent lists from changes */
953 	rtnl_lock();
954 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
955 			    nexthop_neighs_list_node)
956 		/* If this neigh have nexthops, make the kernel think this neigh
957 		 * is active regardless of the traffic.
958 		 */
959 		neigh_event_send(neigh_entry->key.n, NULL);
960 	rtnl_unlock();
961 }
962 
963 static void
964 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
965 {
966 	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
967 
968 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
969 			       msecs_to_jiffies(interval));
970 }
971 
972 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
973 {
974 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
975 						 router.neighs_update.dw.work);
976 	int err;
977 
978 	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
979 	if (err)
980 		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
981 
982 	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
983 
984 	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
985 }
986 
987 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
988 {
989 	struct mlxsw_sp_neigh_entry *neigh_entry;
990 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
991 						 router.nexthop_probe_dw.work);
992 
993 	/* Iterate over nexthop neighbours, find those who are unresolved and
994 	 * send arp on them. This solves the chicken-egg problem when
995 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
996 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
997 	 * using different nexthop.
998 	 *
999 	 * Take RTNL mutex here to prevent lists from changes.
1000 	 */
1001 	rtnl_lock();
1002 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
1003 			    nexthop_neighs_list_node)
1004 		if (!neigh_entry->connected)
1005 			neigh_event_send(neigh_entry->key.n, NULL);
1006 	rtnl_unlock();
1007 
1008 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
1009 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
1010 }
1011 
1012 static void
1013 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1014 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1015 			      bool removing);
1016 
1017 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
1018 {
1019 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
1020 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
1021 }
1022 
1023 static void
1024 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
1025 				struct mlxsw_sp_neigh_entry *neigh_entry,
1026 				enum mlxsw_reg_rauht_op op)
1027 {
1028 	struct neighbour *n = neigh_entry->key.n;
1029 	u32 dip = ntohl(*((__be32 *) n->primary_key));
1030 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1031 
1032 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1033 			      dip);
1034 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1035 }
1036 
1037 static void
1038 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
1039 			    struct mlxsw_sp_neigh_entry *neigh_entry,
1040 			    bool adding)
1041 {
1042 	if (!adding && !neigh_entry->connected)
1043 		return;
1044 	neigh_entry->connected = adding;
1045 	if (neigh_entry->key.n->tbl == &arp_tbl)
1046 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
1047 						mlxsw_sp_rauht_op(adding));
1048 	else
1049 		WARN_ON_ONCE(1);
1050 }
1051 
1052 struct mlxsw_sp_neigh_event_work {
1053 	struct work_struct work;
1054 	struct mlxsw_sp *mlxsw_sp;
1055 	struct neighbour *n;
1056 };
1057 
1058 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
1059 {
1060 	struct mlxsw_sp_neigh_event_work *neigh_work =
1061 		container_of(work, struct mlxsw_sp_neigh_event_work, work);
1062 	struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
1063 	struct mlxsw_sp_neigh_entry *neigh_entry;
1064 	struct neighbour *n = neigh_work->n;
1065 	unsigned char ha[ETH_ALEN];
1066 	bool entry_connected;
1067 	u8 nud_state, dead;
1068 
1069 	/* If these parameters are changed after we release the lock,
1070 	 * then we are guaranteed to receive another event letting us
1071 	 * know about it.
1072 	 */
1073 	read_lock_bh(&n->lock);
1074 	memcpy(ha, n->ha, ETH_ALEN);
1075 	nud_state = n->nud_state;
1076 	dead = n->dead;
1077 	read_unlock_bh(&n->lock);
1078 
1079 	rtnl_lock();
1080 	entry_connected = nud_state & NUD_VALID && !dead;
1081 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1082 	if (!entry_connected && !neigh_entry)
1083 		goto out;
1084 	if (!neigh_entry) {
1085 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1086 		if (IS_ERR(neigh_entry))
1087 			goto out;
1088 	}
1089 
1090 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
1091 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
1092 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
1093 
1094 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1095 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1096 
1097 out:
1098 	rtnl_unlock();
1099 	neigh_release(n);
1100 	kfree(neigh_work);
1101 }
1102 
1103 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
1104 				   unsigned long event, void *ptr)
1105 {
1106 	struct mlxsw_sp_neigh_event_work *neigh_work;
1107 	struct mlxsw_sp_port *mlxsw_sp_port;
1108 	struct mlxsw_sp *mlxsw_sp;
1109 	unsigned long interval;
1110 	struct neigh_parms *p;
1111 	struct neighbour *n;
1112 
1113 	switch (event) {
1114 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1115 		p = ptr;
1116 
1117 		/* We don't care about changes in the default table. */
1118 		if (!p->dev || p->tbl != &arp_tbl)
1119 			return NOTIFY_DONE;
1120 
1121 		/* We are in atomic context and can't take RTNL mutex,
1122 		 * so use RCU variant to walk the device chain.
1123 		 */
1124 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1125 		if (!mlxsw_sp_port)
1126 			return NOTIFY_DONE;
1127 
1128 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1129 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1130 		mlxsw_sp->router.neighs_update.interval = interval;
1131 
1132 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
1133 		break;
1134 	case NETEVENT_NEIGH_UPDATE:
1135 		n = ptr;
1136 
1137 		if (n->tbl != &arp_tbl)
1138 			return NOTIFY_DONE;
1139 
1140 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1141 		if (!mlxsw_sp_port)
1142 			return NOTIFY_DONE;
1143 
1144 		neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1145 		if (!neigh_work) {
1146 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
1147 			return NOTIFY_BAD;
1148 		}
1149 
1150 		INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1151 		neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1152 		neigh_work->n = n;
1153 
1154 		/* Take a reference to ensure the neighbour won't be
1155 		 * destructed until we drop the reference in delayed
1156 		 * work.
1157 		 */
1158 		neigh_clone(n);
1159 		mlxsw_core_schedule_work(&neigh_work->work);
1160 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
1161 		break;
1162 	}
1163 
1164 	return NOTIFY_DONE;
1165 }
1166 
1167 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1168 {
1169 	int err;
1170 
1171 	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1172 			      &mlxsw_sp_neigh_ht_params);
1173 	if (err)
1174 		return err;
1175 
1176 	/* Initialize the polling interval according to the default
1177 	 * table.
1178 	 */
1179 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1180 
1181 	/* Create the delayed works for the activity_update */
1182 	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1183 			  mlxsw_sp_router_neighs_update_work);
1184 	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1185 			  mlxsw_sp_router_probe_unresolved_nexthops);
1186 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1187 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1188 	return 0;
1189 }
1190 
1191 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1192 {
1193 	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1194 	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1195 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1196 }
1197 
1198 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1199 				    const struct mlxsw_sp_rif *rif)
1200 {
1201 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1202 
1203 	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1204 			     rif->rif_index, rif->addr);
1205 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1206 }
1207 
1208 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1209 					 struct mlxsw_sp_rif *rif)
1210 {
1211 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1212 
1213 	mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
1214 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
1215 				 rif_list_node)
1216 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1217 }
1218 
1219 struct mlxsw_sp_nexthop_key {
1220 	struct fib_nh *fib_nh;
1221 };
1222 
1223 struct mlxsw_sp_nexthop {
1224 	struct list_head neigh_list_node; /* member of neigh entry list */
1225 	struct list_head rif_list_node;
1226 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1227 						* this belongs to
1228 						*/
1229 	struct rhash_head ht_node;
1230 	struct mlxsw_sp_nexthop_key key;
1231 	struct mlxsw_sp_rif *rif;
1232 	u8 should_offload:1, /* set indicates this neigh is connected and
1233 			      * should be put to KVD linear area of this group.
1234 			      */
1235 	   offloaded:1, /* set in case the neigh is actually put into
1236 			 * KVD linear area of this group.
1237 			 */
1238 	   update:1; /* set indicates that MAC of this neigh should be
1239 		      * updated in HW
1240 		      */
1241 	struct mlxsw_sp_neigh_entry *neigh_entry;
1242 };
1243 
1244 struct mlxsw_sp_nexthop_group_key {
1245 	struct fib_info *fi;
1246 };
1247 
1248 struct mlxsw_sp_nexthop_group {
1249 	struct rhash_head ht_node;
1250 	struct list_head fib_list; /* list of fib entries that use this group */
1251 	struct mlxsw_sp_nexthop_group_key key;
1252 	u8 adj_index_valid:1,
1253 	   gateway:1; /* routes using the group use a gateway */
1254 	u32 adj_index;
1255 	u16 ecmp_size;
1256 	u16 count;
1257 	struct mlxsw_sp_nexthop nexthops[0];
1258 #define nh_rif	nexthops[0].rif
1259 };
1260 
1261 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1262 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1263 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1264 	.key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1265 };
1266 
1267 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1268 					 struct mlxsw_sp_nexthop_group *nh_grp)
1269 {
1270 	return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1271 				      &nh_grp->ht_node,
1272 				      mlxsw_sp_nexthop_group_ht_params);
1273 }
1274 
1275 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1276 					  struct mlxsw_sp_nexthop_group *nh_grp)
1277 {
1278 	rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1279 			       &nh_grp->ht_node,
1280 			       mlxsw_sp_nexthop_group_ht_params);
1281 }
1282 
1283 static struct mlxsw_sp_nexthop_group *
1284 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1285 			      struct mlxsw_sp_nexthop_group_key key)
1286 {
1287 	return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1288 				      mlxsw_sp_nexthop_group_ht_params);
1289 }
1290 
1291 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1292 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1293 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1294 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
1295 };
1296 
1297 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1298 				   struct mlxsw_sp_nexthop *nh)
1299 {
1300 	return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1301 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1302 }
1303 
1304 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1305 				    struct mlxsw_sp_nexthop *nh)
1306 {
1307 	rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1308 			       mlxsw_sp_nexthop_ht_params);
1309 }
1310 
1311 static struct mlxsw_sp_nexthop *
1312 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1313 			struct mlxsw_sp_nexthop_key key)
1314 {
1315 	return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1316 				      mlxsw_sp_nexthop_ht_params);
1317 }
1318 
1319 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1320 					     const struct mlxsw_sp_fib *fib,
1321 					     u32 adj_index, u16 ecmp_size,
1322 					     u32 new_adj_index,
1323 					     u16 new_ecmp_size)
1324 {
1325 	char raleu_pl[MLXSW_REG_RALEU_LEN];
1326 
1327 	mlxsw_reg_raleu_pack(raleu_pl,
1328 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
1329 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
1330 			     new_ecmp_size);
1331 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1332 }
1333 
1334 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1335 					  struct mlxsw_sp_nexthop_group *nh_grp,
1336 					  u32 old_adj_index, u16 old_ecmp_size)
1337 {
1338 	struct mlxsw_sp_fib_entry *fib_entry;
1339 	struct mlxsw_sp_fib *fib = NULL;
1340 	int err;
1341 
1342 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1343 		if (fib == fib_entry->fib_node->fib)
1344 			continue;
1345 		fib = fib_entry->fib_node->fib;
1346 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
1347 							old_adj_index,
1348 							old_ecmp_size,
1349 							nh_grp->adj_index,
1350 							nh_grp->ecmp_size);
1351 		if (err)
1352 			return err;
1353 	}
1354 	return 0;
1355 }
1356 
1357 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1358 				       struct mlxsw_sp_nexthop *nh)
1359 {
1360 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1361 	char ratr_pl[MLXSW_REG_RATR_LEN];
1362 
1363 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1364 			    true, adj_index, neigh_entry->rif);
1365 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1366 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1367 }
1368 
1369 static int
1370 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1371 				  struct mlxsw_sp_nexthop_group *nh_grp,
1372 				  bool reallocate)
1373 {
1374 	u32 adj_index = nh_grp->adj_index; /* base */
1375 	struct mlxsw_sp_nexthop *nh;
1376 	int i;
1377 	int err;
1378 
1379 	for (i = 0; i < nh_grp->count; i++) {
1380 		nh = &nh_grp->nexthops[i];
1381 
1382 		if (!nh->should_offload) {
1383 			nh->offloaded = 0;
1384 			continue;
1385 		}
1386 
1387 		if (nh->update || reallocate) {
1388 			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1389 							  adj_index, nh);
1390 			if (err)
1391 				return err;
1392 			nh->update = 0;
1393 			nh->offloaded = 1;
1394 		}
1395 		adj_index++;
1396 	}
1397 	return 0;
1398 }
1399 
1400 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1401 				     struct mlxsw_sp_fib_entry *fib_entry);
1402 
1403 static int
1404 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1405 				    struct mlxsw_sp_nexthop_group *nh_grp)
1406 {
1407 	struct mlxsw_sp_fib_entry *fib_entry;
1408 	int err;
1409 
1410 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1411 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1412 		if (err)
1413 			return err;
1414 	}
1415 	return 0;
1416 }
1417 
1418 static void
1419 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1420 			       struct mlxsw_sp_nexthop_group *nh_grp)
1421 {
1422 	struct mlxsw_sp_nexthop *nh;
1423 	bool offload_change = false;
1424 	u32 adj_index;
1425 	u16 ecmp_size = 0;
1426 	bool old_adj_index_valid;
1427 	u32 old_adj_index;
1428 	u16 old_ecmp_size;
1429 	int i;
1430 	int err;
1431 
1432 	if (!nh_grp->gateway) {
1433 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1434 		return;
1435 	}
1436 
1437 	for (i = 0; i < nh_grp->count; i++) {
1438 		nh = &nh_grp->nexthops[i];
1439 
1440 		if (nh->should_offload ^ nh->offloaded) {
1441 			offload_change = true;
1442 			if (nh->should_offload)
1443 				nh->update = 1;
1444 		}
1445 		if (nh->should_offload)
1446 			ecmp_size++;
1447 	}
1448 	if (!offload_change) {
1449 		/* Nothing was added or removed, so no need to reallocate. Just
1450 		 * update MAC on existing adjacency indexes.
1451 		 */
1452 		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1453 							false);
1454 		if (err) {
1455 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1456 			goto set_trap;
1457 		}
1458 		return;
1459 	}
1460 	if (!ecmp_size)
1461 		/* No neigh of this group is connected so we just set
1462 		 * the trap and let everthing flow through kernel.
1463 		 */
1464 		goto set_trap;
1465 
1466 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
1467 	if (err) {
1468 		/* We ran out of KVD linear space, just set the
1469 		 * trap and let everything flow through kernel.
1470 		 */
1471 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1472 		goto set_trap;
1473 	}
1474 	old_adj_index_valid = nh_grp->adj_index_valid;
1475 	old_adj_index = nh_grp->adj_index;
1476 	old_ecmp_size = nh_grp->ecmp_size;
1477 	nh_grp->adj_index_valid = 1;
1478 	nh_grp->adj_index = adj_index;
1479 	nh_grp->ecmp_size = ecmp_size;
1480 	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1481 	if (err) {
1482 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1483 		goto set_trap;
1484 	}
1485 
1486 	if (!old_adj_index_valid) {
1487 		/* The trap was set for fib entries, so we have to call
1488 		 * fib entry update to unset it and use adjacency index.
1489 		 */
1490 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1491 		if (err) {
1492 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1493 			goto set_trap;
1494 		}
1495 		return;
1496 	}
1497 
1498 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1499 					     old_adj_index, old_ecmp_size);
1500 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1501 	if (err) {
1502 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1503 		goto set_trap;
1504 	}
1505 	return;
1506 
1507 set_trap:
1508 	old_adj_index_valid = nh_grp->adj_index_valid;
1509 	nh_grp->adj_index_valid = 0;
1510 	for (i = 0; i < nh_grp->count; i++) {
1511 		nh = &nh_grp->nexthops[i];
1512 		nh->offloaded = 0;
1513 	}
1514 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1515 	if (err)
1516 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1517 	if (old_adj_index_valid)
1518 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1519 }
1520 
1521 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1522 					    bool removing)
1523 {
1524 	if (!removing && !nh->should_offload)
1525 		nh->should_offload = 1;
1526 	else if (removing && nh->offloaded)
1527 		nh->should_offload = 0;
1528 	nh->update = 1;
1529 }
1530 
1531 static void
1532 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1533 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1534 			      bool removing)
1535 {
1536 	struct mlxsw_sp_nexthop *nh;
1537 
1538 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
1539 			    neigh_list_node) {
1540 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
1541 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1542 	}
1543 }
1544 
1545 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1546 				      struct mlxsw_sp_rif *rif)
1547 {
1548 	if (nh->rif)
1549 		return;
1550 
1551 	nh->rif = rif;
1552 	list_add(&nh->rif_list_node, &rif->nexthop_list);
1553 }
1554 
1555 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1556 {
1557 	if (!nh->rif)
1558 		return;
1559 
1560 	list_del(&nh->rif_list_node);
1561 	nh->rif = NULL;
1562 }
1563 
1564 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1565 				       struct mlxsw_sp_nexthop *nh)
1566 {
1567 	struct mlxsw_sp_neigh_entry *neigh_entry;
1568 	struct fib_nh *fib_nh = nh->key.fib_nh;
1569 	struct neighbour *n;
1570 	u8 nud_state, dead;
1571 	int err;
1572 
1573 	if (!nh->nh_grp->gateway || nh->neigh_entry)
1574 		return 0;
1575 
1576 	/* Take a reference of neigh here ensuring that neigh would
1577 	 * not be detructed before the nexthop entry is finished.
1578 	 * The reference is taken either in neigh_lookup() or
1579 	 * in neigh_create() in case n is not found.
1580 	 */
1581 	n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1582 	if (!n) {
1583 		n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1584 		if (IS_ERR(n))
1585 			return PTR_ERR(n);
1586 		neigh_event_send(n, NULL);
1587 	}
1588 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1589 	if (!neigh_entry) {
1590 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1591 		if (IS_ERR(neigh_entry)) {
1592 			err = -EINVAL;
1593 			goto err_neigh_entry_create;
1594 		}
1595 	}
1596 
1597 	/* If that is the first nexthop connected to that neigh, add to
1598 	 * nexthop_neighs_list
1599 	 */
1600 	if (list_empty(&neigh_entry->nexthop_list))
1601 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1602 			      &mlxsw_sp->router.nexthop_neighs_list);
1603 
1604 	nh->neigh_entry = neigh_entry;
1605 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1606 	read_lock_bh(&n->lock);
1607 	nud_state = n->nud_state;
1608 	dead = n->dead;
1609 	read_unlock_bh(&n->lock);
1610 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1611 
1612 	return 0;
1613 
1614 err_neigh_entry_create:
1615 	neigh_release(n);
1616 	return err;
1617 }
1618 
1619 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1620 					struct mlxsw_sp_nexthop *nh)
1621 {
1622 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1623 	struct neighbour *n;
1624 
1625 	if (!neigh_entry)
1626 		return;
1627 	n = neigh_entry->key.n;
1628 
1629 	__mlxsw_sp_nexthop_neigh_update(nh, true);
1630 	list_del(&nh->neigh_list_node);
1631 	nh->neigh_entry = NULL;
1632 
1633 	/* If that is the last nexthop connected to that neigh, remove from
1634 	 * nexthop_neighs_list
1635 	 */
1636 	if (list_empty(&neigh_entry->nexthop_list))
1637 		list_del(&neigh_entry->nexthop_neighs_list_node);
1638 
1639 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1640 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1641 
1642 	neigh_release(n);
1643 }
1644 
1645 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1646 				 struct mlxsw_sp_nexthop_group *nh_grp,
1647 				 struct mlxsw_sp_nexthop *nh,
1648 				 struct fib_nh *fib_nh)
1649 {
1650 	struct net_device *dev = fib_nh->nh_dev;
1651 	struct in_device *in_dev;
1652 	struct mlxsw_sp_rif *rif;
1653 	int err;
1654 
1655 	nh->nh_grp = nh_grp;
1656 	nh->key.fib_nh = fib_nh;
1657 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1658 	if (err)
1659 		return err;
1660 
1661 	if (!dev)
1662 		return 0;
1663 
1664 	in_dev = __in_dev_get_rtnl(dev);
1665 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1666 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
1667 		return 0;
1668 
1669 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1670 	if (!rif)
1671 		return 0;
1672 	mlxsw_sp_nexthop_rif_init(nh, rif);
1673 
1674 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1675 	if (err)
1676 		goto err_nexthop_neigh_init;
1677 
1678 	return 0;
1679 
1680 err_nexthop_neigh_init:
1681 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1682 	return err;
1683 }
1684 
1685 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1686 				  struct mlxsw_sp_nexthop *nh)
1687 {
1688 	mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1689 	mlxsw_sp_nexthop_rif_fini(nh);
1690 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1691 }
1692 
1693 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1694 				   unsigned long event, struct fib_nh *fib_nh)
1695 {
1696 	struct mlxsw_sp_nexthop_key key;
1697 	struct mlxsw_sp_nexthop *nh;
1698 	struct mlxsw_sp_rif *rif;
1699 
1700 	if (mlxsw_sp->router.aborted)
1701 		return;
1702 
1703 	key.fib_nh = fib_nh;
1704 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1705 	if (WARN_ON_ONCE(!nh))
1706 		return;
1707 
1708 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1709 	if (!rif)
1710 		return;
1711 
1712 	switch (event) {
1713 	case FIB_EVENT_NH_ADD:
1714 		mlxsw_sp_nexthop_rif_init(nh, rif);
1715 		mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1716 		break;
1717 	case FIB_EVENT_NH_DEL:
1718 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1719 		mlxsw_sp_nexthop_rif_fini(nh);
1720 		break;
1721 	}
1722 
1723 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1724 }
1725 
1726 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1727 					   struct mlxsw_sp_rif *rif)
1728 {
1729 	struct mlxsw_sp_nexthop *nh, *tmp;
1730 
1731 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
1732 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1733 		mlxsw_sp_nexthop_rif_fini(nh);
1734 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1735 	}
1736 }
1737 
1738 static struct mlxsw_sp_nexthop_group *
1739 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1740 {
1741 	struct mlxsw_sp_nexthop_group *nh_grp;
1742 	struct mlxsw_sp_nexthop *nh;
1743 	struct fib_nh *fib_nh;
1744 	size_t alloc_size;
1745 	int i;
1746 	int err;
1747 
1748 	alloc_size = sizeof(*nh_grp) +
1749 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1750 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1751 	if (!nh_grp)
1752 		return ERR_PTR(-ENOMEM);
1753 	INIT_LIST_HEAD(&nh_grp->fib_list);
1754 	nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1755 	nh_grp->count = fi->fib_nhs;
1756 	nh_grp->key.fi = fi;
1757 	for (i = 0; i < nh_grp->count; i++) {
1758 		nh = &nh_grp->nexthops[i];
1759 		fib_nh = &fi->fib_nh[i];
1760 		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1761 		if (err)
1762 			goto err_nexthop_init;
1763 	}
1764 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1765 	if (err)
1766 		goto err_nexthop_group_insert;
1767 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1768 	return nh_grp;
1769 
1770 err_nexthop_group_insert:
1771 err_nexthop_init:
1772 	for (i--; i >= 0; i--) {
1773 		nh = &nh_grp->nexthops[i];
1774 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1775 	}
1776 	kfree(nh_grp);
1777 	return ERR_PTR(err);
1778 }
1779 
1780 static void
1781 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1782 			       struct mlxsw_sp_nexthop_group *nh_grp)
1783 {
1784 	struct mlxsw_sp_nexthop *nh;
1785 	int i;
1786 
1787 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1788 	for (i = 0; i < nh_grp->count; i++) {
1789 		nh = &nh_grp->nexthops[i];
1790 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1791 	}
1792 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1793 	WARN_ON_ONCE(nh_grp->adj_index_valid);
1794 	kfree(nh_grp);
1795 }
1796 
1797 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1798 				      struct mlxsw_sp_fib_entry *fib_entry,
1799 				      struct fib_info *fi)
1800 {
1801 	struct mlxsw_sp_nexthop_group_key key;
1802 	struct mlxsw_sp_nexthop_group *nh_grp;
1803 
1804 	key.fi = fi;
1805 	nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1806 	if (!nh_grp) {
1807 		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1808 		if (IS_ERR(nh_grp))
1809 			return PTR_ERR(nh_grp);
1810 	}
1811 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1812 	fib_entry->nh_group = nh_grp;
1813 	return 0;
1814 }
1815 
1816 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1817 				       struct mlxsw_sp_fib_entry *fib_entry)
1818 {
1819 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1820 
1821 	list_del(&fib_entry->nexthop_group_node);
1822 	if (!list_empty(&nh_grp->fib_list))
1823 		return;
1824 	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1825 }
1826 
1827 static bool
1828 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1829 {
1830 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1831 
1832 	if (fib_entry->params.tos)
1833 		return false;
1834 
1835 	switch (fib_entry->type) {
1836 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1837 		return !!nh_group->adj_index_valid;
1838 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1839 		return !!nh_group->nh_rif;
1840 	default:
1841 		return false;
1842 	}
1843 }
1844 
1845 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1846 {
1847 	fib_entry->offloaded = true;
1848 
1849 	switch (fib_entry->fib_node->fib->proto) {
1850 	case MLXSW_SP_L3_PROTO_IPV4:
1851 		fib_info_offload_inc(fib_entry->nh_group->key.fi);
1852 		break;
1853 	case MLXSW_SP_L3_PROTO_IPV6:
1854 		WARN_ON_ONCE(1);
1855 	}
1856 }
1857 
1858 static void
1859 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1860 {
1861 	switch (fib_entry->fib_node->fib->proto) {
1862 	case MLXSW_SP_L3_PROTO_IPV4:
1863 		fib_info_offload_dec(fib_entry->nh_group->key.fi);
1864 		break;
1865 	case MLXSW_SP_L3_PROTO_IPV6:
1866 		WARN_ON_ONCE(1);
1867 	}
1868 
1869 	fib_entry->offloaded = false;
1870 }
1871 
1872 static void
1873 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1874 				   enum mlxsw_reg_ralue_op op, int err)
1875 {
1876 	switch (op) {
1877 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1878 		if (!fib_entry->offloaded)
1879 			return;
1880 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1881 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1882 		if (err)
1883 			return;
1884 		if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1885 		    !fib_entry->offloaded)
1886 			mlxsw_sp_fib_entry_offload_set(fib_entry);
1887 		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1888 			 fib_entry->offloaded)
1889 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
1890 		return;
1891 	default:
1892 		return;
1893 	}
1894 }
1895 
1896 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1897 					 struct mlxsw_sp_fib_entry *fib_entry,
1898 					 enum mlxsw_reg_ralue_op op)
1899 {
1900 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1901 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1902 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1903 	enum mlxsw_reg_ralue_trap_action trap_action;
1904 	u16 trap_id = 0;
1905 	u32 adjacency_index = 0;
1906 	u16 ecmp_size = 0;
1907 
1908 	/* In case the nexthop group adjacency index is valid, use it
1909 	 * with provided ECMP size. Otherwise, setup trap and pass
1910 	 * traffic to kernel.
1911 	 */
1912 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1913 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1914 		adjacency_index = fib_entry->nh_group->adj_index;
1915 		ecmp_size = fib_entry->nh_group->ecmp_size;
1916 	} else {
1917 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1918 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1919 	}
1920 
1921 	mlxsw_reg_ralue_pack4(ralue_pl,
1922 			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1923 			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
1924 			      *p_dip);
1925 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1926 					adjacency_index, ecmp_size);
1927 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1928 }
1929 
1930 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1931 					struct mlxsw_sp_fib_entry *fib_entry,
1932 					enum mlxsw_reg_ralue_op op)
1933 {
1934 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
1935 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1936 	enum mlxsw_reg_ralue_trap_action trap_action;
1937 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1938 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1939 	u16 trap_id = 0;
1940 	u16 rif_index = 0;
1941 
1942 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1943 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1944 		rif_index = rif->rif_index;
1945 	} else {
1946 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1947 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1948 	}
1949 
1950 	mlxsw_reg_ralue_pack4(ralue_pl,
1951 			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1952 			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
1953 			      *p_dip);
1954 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
1955 				       rif_index);
1956 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1957 }
1958 
1959 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1960 				       struct mlxsw_sp_fib_entry *fib_entry,
1961 				       enum mlxsw_reg_ralue_op op)
1962 {
1963 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1964 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1965 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1966 
1967 	mlxsw_reg_ralue_pack4(ralue_pl,
1968 			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1969 			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
1970 			      *p_dip);
1971 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1972 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1973 }
1974 
1975 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1976 				  struct mlxsw_sp_fib_entry *fib_entry,
1977 				  enum mlxsw_reg_ralue_op op)
1978 {
1979 	switch (fib_entry->type) {
1980 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1981 		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1982 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1983 		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1984 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1985 		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1986 	}
1987 	return -EINVAL;
1988 }
1989 
1990 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1991 				 struct mlxsw_sp_fib_entry *fib_entry,
1992 				 enum mlxsw_reg_ralue_op op)
1993 {
1994 	int err = -EINVAL;
1995 
1996 	switch (fib_entry->fib_node->fib->proto) {
1997 	case MLXSW_SP_L3_PROTO_IPV4:
1998 		err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1999 		break;
2000 	case MLXSW_SP_L3_PROTO_IPV6:
2001 		return err;
2002 	}
2003 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
2004 	return err;
2005 }
2006 
2007 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
2008 				     struct mlxsw_sp_fib_entry *fib_entry)
2009 {
2010 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2011 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
2012 }
2013 
2014 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
2015 				  struct mlxsw_sp_fib_entry *fib_entry)
2016 {
2017 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2018 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
2019 }
2020 
2021 static int
2022 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
2023 			     const struct fib_entry_notifier_info *fen_info,
2024 			     struct mlxsw_sp_fib_entry *fib_entry)
2025 {
2026 	struct fib_info *fi = fen_info->fi;
2027 
2028 	switch (fen_info->type) {
2029 	case RTN_BROADCAST: /* fall through */
2030 	case RTN_LOCAL:
2031 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2032 		return 0;
2033 	case RTN_UNREACHABLE: /* fall through */
2034 	case RTN_BLACKHOLE: /* fall through */
2035 	case RTN_PROHIBIT:
2036 		/* Packets hitting these routes need to be trapped, but
2037 		 * can do so with a lower priority than packets directed
2038 		 * at the host, so use action type local instead of trap.
2039 		 */
2040 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2041 		return 0;
2042 	case RTN_UNICAST:
2043 		if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
2044 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2045 		else
2046 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
2047 		return 0;
2048 	default:
2049 		return -EINVAL;
2050 	}
2051 }
2052 
2053 static struct mlxsw_sp_fib_entry *
2054 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
2055 			   struct mlxsw_sp_fib_node *fib_node,
2056 			   const struct fib_entry_notifier_info *fen_info)
2057 {
2058 	struct mlxsw_sp_fib_entry *fib_entry;
2059 	int err;
2060 
2061 	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
2062 	if (!fib_entry) {
2063 		err = -ENOMEM;
2064 		goto err_fib_entry_alloc;
2065 	}
2066 
2067 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
2068 	if (err)
2069 		goto err_fib4_entry_type_set;
2070 
2071 	err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
2072 	if (err)
2073 		goto err_nexthop_group_get;
2074 
2075 	fib_entry->params.prio = fen_info->fi->fib_priority;
2076 	fib_entry->params.tb_id = fen_info->tb_id;
2077 	fib_entry->params.type = fen_info->type;
2078 	fib_entry->params.tos = fen_info->tos;
2079 
2080 	fib_entry->fib_node = fib_node;
2081 
2082 	return fib_entry;
2083 
2084 err_nexthop_group_get:
2085 err_fib4_entry_type_set:
2086 	kfree(fib_entry);
2087 err_fib_entry_alloc:
2088 	return ERR_PTR(err);
2089 }
2090 
2091 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2092 					struct mlxsw_sp_fib_entry *fib_entry)
2093 {
2094 	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
2095 	kfree(fib_entry);
2096 }
2097 
2098 static struct mlxsw_sp_fib_node *
2099 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2100 		       const struct fib_entry_notifier_info *fen_info);
2101 
2102 static struct mlxsw_sp_fib_entry *
2103 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
2104 			   const struct fib_entry_notifier_info *fen_info)
2105 {
2106 	struct mlxsw_sp_fib_entry *fib_entry;
2107 	struct mlxsw_sp_fib_node *fib_node;
2108 
2109 	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2110 	if (IS_ERR(fib_node))
2111 		return NULL;
2112 
2113 	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2114 		if (fib_entry->params.tb_id == fen_info->tb_id &&
2115 		    fib_entry->params.tos == fen_info->tos &&
2116 		    fib_entry->params.type == fen_info->type &&
2117 		    fib_entry->nh_group->key.fi == fen_info->fi) {
2118 			return fib_entry;
2119 		}
2120 	}
2121 
2122 	return NULL;
2123 }
2124 
2125 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
2126 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
2127 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
2128 	.key_len = sizeof(struct mlxsw_sp_fib_key),
2129 	.automatic_shrinking = true,
2130 };
2131 
2132 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
2133 				    struct mlxsw_sp_fib_node *fib_node)
2134 {
2135 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
2136 				      mlxsw_sp_fib_ht_params);
2137 }
2138 
2139 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
2140 				     struct mlxsw_sp_fib_node *fib_node)
2141 {
2142 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
2143 			       mlxsw_sp_fib_ht_params);
2144 }
2145 
2146 static struct mlxsw_sp_fib_node *
2147 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2148 			 size_t addr_len, unsigned char prefix_len)
2149 {
2150 	struct mlxsw_sp_fib_key key;
2151 
2152 	memset(&key, 0, sizeof(key));
2153 	memcpy(key.addr, addr, addr_len);
2154 	key.prefix_len = prefix_len;
2155 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
2156 }
2157 
2158 static struct mlxsw_sp_fib_node *
2159 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
2160 			 size_t addr_len, unsigned char prefix_len)
2161 {
2162 	struct mlxsw_sp_fib_node *fib_node;
2163 
2164 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2165 	if (!fib_node)
2166 		return NULL;
2167 
2168 	INIT_LIST_HEAD(&fib_node->entry_list);
2169 	list_add(&fib_node->list, &fib->node_list);
2170 	memcpy(fib_node->key.addr, addr, addr_len);
2171 	fib_node->key.prefix_len = prefix_len;
2172 
2173 	return fib_node;
2174 }
2175 
2176 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2177 {
2178 	list_del(&fib_node->list);
2179 	WARN_ON(!list_empty(&fib_node->entry_list));
2180 	kfree(fib_node);
2181 }
2182 
2183 static bool
2184 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2185 				 const struct mlxsw_sp_fib_entry *fib_entry)
2186 {
2187 	return list_first_entry(&fib_node->entry_list,
2188 				struct mlxsw_sp_fib_entry, list) == fib_entry;
2189 }
2190 
2191 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2192 {
2193 	unsigned char prefix_len = fib_node->key.prefix_len;
2194 	struct mlxsw_sp_fib *fib = fib_node->fib;
2195 
2196 	if (fib->prefix_ref_count[prefix_len]++ == 0)
2197 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2198 }
2199 
2200 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2201 {
2202 	unsigned char prefix_len = fib_node->key.prefix_len;
2203 	struct mlxsw_sp_fib *fib = fib_node->fib;
2204 
2205 	if (--fib->prefix_ref_count[prefix_len] == 0)
2206 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2207 }
2208 
2209 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
2210 				  struct mlxsw_sp_fib_node *fib_node,
2211 				  struct mlxsw_sp_fib *fib)
2212 {
2213 	struct mlxsw_sp_prefix_usage req_prefix_usage;
2214 	struct mlxsw_sp_lpm_tree *lpm_tree;
2215 	int err;
2216 
2217 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
2218 	if (err)
2219 		return err;
2220 	fib_node->fib = fib;
2221 
2222 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage);
2223 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
2224 
2225 	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
2226 		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib,
2227 						 &req_prefix_usage);
2228 		if (err)
2229 			goto err_tree_check;
2230 	} else {
2231 		lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
2232 						 fib->proto);
2233 		if (IS_ERR(lpm_tree))
2234 			return PTR_ERR(lpm_tree);
2235 		fib->lpm_tree = lpm_tree;
2236 		err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
2237 		if (err)
2238 			goto err_tree_bind;
2239 	}
2240 
2241 	mlxsw_sp_fib_node_prefix_inc(fib_node);
2242 
2243 	return 0;
2244 
2245 err_tree_bind:
2246 	fib->lpm_tree = NULL;
2247 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
2248 err_tree_check:
2249 	fib_node->fib = NULL;
2250 	mlxsw_sp_fib_node_remove(fib, fib_node);
2251 	return err;
2252 }
2253 
2254 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
2255 				   struct mlxsw_sp_fib_node *fib_node)
2256 {
2257 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
2258 	struct mlxsw_sp_fib *fib = fib_node->fib;
2259 
2260 	mlxsw_sp_fib_node_prefix_dec(fib_node);
2261 
2262 	if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
2263 		mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
2264 		fib->lpm_tree = NULL;
2265 		mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
2266 	} else {
2267 		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage);
2268 	}
2269 
2270 	fib_node->fib = NULL;
2271 	mlxsw_sp_fib_node_remove(fib, fib_node);
2272 }
2273 
2274 static struct mlxsw_sp_fib_node *
2275 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2276 		       const struct fib_entry_notifier_info *fen_info)
2277 {
2278 	struct mlxsw_sp_fib_node *fib_node;
2279 	struct mlxsw_sp_fib *fib;
2280 	struct mlxsw_sp_vr *vr;
2281 	int err;
2282 
2283 	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id);
2284 	if (IS_ERR(vr))
2285 		return ERR_CAST(vr);
2286 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
2287 
2288 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
2289 					    sizeof(fen_info->dst),
2290 					    fen_info->dst_len);
2291 	if (fib_node)
2292 		return fib_node;
2293 
2294 	fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst,
2295 					    sizeof(fen_info->dst),
2296 					    fen_info->dst_len);
2297 	if (!fib_node) {
2298 		err = -ENOMEM;
2299 		goto err_fib_node_create;
2300 	}
2301 
2302 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
2303 	if (err)
2304 		goto err_fib_node_init;
2305 
2306 	return fib_node;
2307 
2308 err_fib_node_init:
2309 	mlxsw_sp_fib_node_destroy(fib_node);
2310 err_fib_node_create:
2311 	mlxsw_sp_vr_put(vr);
2312 	return ERR_PTR(err);
2313 }
2314 
2315 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2316 				   struct mlxsw_sp_fib_node *fib_node)
2317 {
2318 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
2319 
2320 	if (!list_empty(&fib_node->entry_list))
2321 		return;
2322 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
2323 	mlxsw_sp_fib_node_destroy(fib_node);
2324 	mlxsw_sp_vr_put(vr);
2325 }
2326 
2327 static struct mlxsw_sp_fib_entry *
2328 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2329 			      const struct mlxsw_sp_fib_entry_params *params)
2330 {
2331 	struct mlxsw_sp_fib_entry *fib_entry;
2332 
2333 	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2334 		if (fib_entry->params.tb_id > params->tb_id)
2335 			continue;
2336 		if (fib_entry->params.tb_id != params->tb_id)
2337 			break;
2338 		if (fib_entry->params.tos > params->tos)
2339 			continue;
2340 		if (fib_entry->params.prio >= params->prio ||
2341 		    fib_entry->params.tos < params->tos)
2342 			return fib_entry;
2343 	}
2344 
2345 	return NULL;
2346 }
2347 
2348 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2349 					  struct mlxsw_sp_fib_entry *new_entry)
2350 {
2351 	struct mlxsw_sp_fib_node *fib_node;
2352 
2353 	if (WARN_ON(!fib_entry))
2354 		return -EINVAL;
2355 
2356 	fib_node = fib_entry->fib_node;
2357 	list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2358 		if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2359 		    fib_entry->params.tos != new_entry->params.tos ||
2360 		    fib_entry->params.prio != new_entry->params.prio)
2361 			break;
2362 	}
2363 
2364 	list_add_tail(&new_entry->list, &fib_entry->list);
2365 	return 0;
2366 }
2367 
2368 static int
2369 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2370 			       struct mlxsw_sp_fib_entry *new_entry,
2371 			       bool replace, bool append)
2372 {
2373 	struct mlxsw_sp_fib_entry *fib_entry;
2374 
2375 	fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2376 
2377 	if (append)
2378 		return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2379 	if (replace && WARN_ON(!fib_entry))
2380 		return -EINVAL;
2381 
2382 	/* Insert new entry before replaced one, so that we can later
2383 	 * remove the second.
2384 	 */
2385 	if (fib_entry) {
2386 		list_add_tail(&new_entry->list, &fib_entry->list);
2387 	} else {
2388 		struct mlxsw_sp_fib_entry *last;
2389 
2390 		list_for_each_entry(last, &fib_node->entry_list, list) {
2391 			if (new_entry->params.tb_id > last->params.tb_id)
2392 				break;
2393 			fib_entry = last;
2394 		}
2395 
2396 		if (fib_entry)
2397 			list_add(&new_entry->list, &fib_entry->list);
2398 		else
2399 			list_add(&new_entry->list, &fib_node->entry_list);
2400 	}
2401 
2402 	return 0;
2403 }
2404 
2405 static void
2406 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2407 {
2408 	list_del(&fib_entry->list);
2409 }
2410 
2411 static int
2412 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2413 			     const struct mlxsw_sp_fib_node *fib_node,
2414 			     struct mlxsw_sp_fib_entry *fib_entry)
2415 {
2416 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2417 		return 0;
2418 
2419 	/* To prevent packet loss, overwrite the previously offloaded
2420 	 * entry.
2421 	 */
2422 	if (!list_is_singular(&fib_node->entry_list)) {
2423 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2424 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2425 
2426 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2427 	}
2428 
2429 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2430 }
2431 
2432 static void
2433 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2434 			     const struct mlxsw_sp_fib_node *fib_node,
2435 			     struct mlxsw_sp_fib_entry *fib_entry)
2436 {
2437 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2438 		return;
2439 
2440 	/* Promote the next entry by overwriting the deleted entry */
2441 	if (!list_is_singular(&fib_node->entry_list)) {
2442 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2443 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2444 
2445 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2446 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2447 		return;
2448 	}
2449 
2450 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2451 }
2452 
2453 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2454 					 struct mlxsw_sp_fib_entry *fib_entry,
2455 					 bool replace, bool append)
2456 {
2457 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2458 	int err;
2459 
2460 	err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2461 					     append);
2462 	if (err)
2463 		return err;
2464 
2465 	err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2466 	if (err)
2467 		goto err_fib4_node_entry_add;
2468 
2469 	return 0;
2470 
2471 err_fib4_node_entry_add:
2472 	mlxsw_sp_fib4_node_list_remove(fib_entry);
2473 	return err;
2474 }
2475 
2476 static void
2477 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2478 				struct mlxsw_sp_fib_entry *fib_entry)
2479 {
2480 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2481 
2482 	mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2483 	mlxsw_sp_fib4_node_list_remove(fib_entry);
2484 }
2485 
2486 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2487 					struct mlxsw_sp_fib_entry *fib_entry,
2488 					bool replace)
2489 {
2490 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2491 	struct mlxsw_sp_fib_entry *replaced;
2492 
2493 	if (!replace)
2494 		return;
2495 
2496 	/* We inserted the new entry before replaced one */
2497 	replaced = list_next_entry(fib_entry, list);
2498 
2499 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2500 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2501 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2502 }
2503 
2504 static int
2505 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2506 			 const struct fib_entry_notifier_info *fen_info,
2507 			 bool replace, bool append)
2508 {
2509 	struct mlxsw_sp_fib_entry *fib_entry;
2510 	struct mlxsw_sp_fib_node *fib_node;
2511 	int err;
2512 
2513 	if (mlxsw_sp->router.aborted)
2514 		return 0;
2515 
2516 	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2517 	if (IS_ERR(fib_node)) {
2518 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2519 		return PTR_ERR(fib_node);
2520 	}
2521 
2522 	fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2523 	if (IS_ERR(fib_entry)) {
2524 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2525 		err = PTR_ERR(fib_entry);
2526 		goto err_fib4_entry_create;
2527 	}
2528 
2529 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2530 					    append);
2531 	if (err) {
2532 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2533 		goto err_fib4_node_entry_link;
2534 	}
2535 
2536 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2537 
2538 	return 0;
2539 
2540 err_fib4_node_entry_link:
2541 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2542 err_fib4_entry_create:
2543 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2544 	return err;
2545 }
2546 
2547 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2548 				     struct fib_entry_notifier_info *fen_info)
2549 {
2550 	struct mlxsw_sp_fib_entry *fib_entry;
2551 	struct mlxsw_sp_fib_node *fib_node;
2552 
2553 	if (mlxsw_sp->router.aborted)
2554 		return;
2555 
2556 	fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2557 	if (WARN_ON(!fib_entry))
2558 		return;
2559 	fib_node = fib_entry->fib_node;
2560 
2561 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2562 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2563 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2564 }
2565 
2566 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2567 {
2568 	char ralta_pl[MLXSW_REG_RALTA_LEN];
2569 	char ralst_pl[MLXSW_REG_RALST_LEN];
2570 	int i, err;
2571 
2572 	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2573 			     MLXSW_SP_LPM_TREE_MIN);
2574 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2575 	if (err)
2576 		return err;
2577 
2578 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2579 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2580 	if (err)
2581 		return err;
2582 
2583 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2584 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
2585 		char raltb_pl[MLXSW_REG_RALTB_LEN];
2586 		char ralue_pl[MLXSW_REG_RALUE_LEN];
2587 
2588 		if (!mlxsw_sp_vr_is_used(vr))
2589 			continue;
2590 
2591 		mlxsw_reg_raltb_pack(raltb_pl, vr->id,
2592 				     MLXSW_REG_RALXX_PROTOCOL_IPV4,
2593 				     MLXSW_SP_LPM_TREE_MIN);
2594 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
2595 				      raltb_pl);
2596 		if (err)
2597 			return err;
2598 
2599 		mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2600 				      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0,
2601 				      0);
2602 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2603 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
2604 				      ralue_pl);
2605 		if (err)
2606 			return err;
2607 	}
2608 
2609 	return 0;
2610 }
2611 
2612 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2613 				     struct mlxsw_sp_fib_node *fib_node)
2614 {
2615 	struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2616 
2617 	list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2618 		bool do_break = &tmp->list == &fib_node->entry_list;
2619 
2620 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2621 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2622 		mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2623 		/* Break when entry list is empty and node was freed.
2624 		 * Otherwise, we'll access freed memory in the next
2625 		 * iteration.
2626 		 */
2627 		if (do_break)
2628 			break;
2629 	}
2630 }
2631 
2632 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2633 				    struct mlxsw_sp_fib_node *fib_node)
2634 {
2635 	switch (fib_node->fib->proto) {
2636 	case MLXSW_SP_L3_PROTO_IPV4:
2637 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2638 		break;
2639 	case MLXSW_SP_L3_PROTO_IPV6:
2640 		WARN_ON_ONCE(1);
2641 		break;
2642 	}
2643 }
2644 
2645 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
2646 				  struct mlxsw_sp_vr *vr,
2647 				  enum mlxsw_sp_l3proto proto)
2648 {
2649 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
2650 	struct mlxsw_sp_fib_node *fib_node, *tmp;
2651 
2652 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
2653 		bool do_break = &tmp->list == &fib->node_list;
2654 
2655 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2656 		if (do_break)
2657 			break;
2658 	}
2659 }
2660 
2661 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2662 {
2663 	int i;
2664 
2665 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2666 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
2667 
2668 		if (!mlxsw_sp_vr_is_used(vr))
2669 			continue;
2670 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
2671 	}
2672 }
2673 
2674 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2675 {
2676 	int err;
2677 
2678 	if (mlxsw_sp->router.aborted)
2679 		return;
2680 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2681 	mlxsw_sp_router_fib_flush(mlxsw_sp);
2682 	mlxsw_sp->router.aborted = true;
2683 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2684 	if (err)
2685 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2686 }
2687 
2688 struct mlxsw_sp_fib_event_work {
2689 	struct work_struct work;
2690 	union {
2691 		struct fib_entry_notifier_info fen_info;
2692 		struct fib_rule_notifier_info fr_info;
2693 		struct fib_nh_notifier_info fnh_info;
2694 	};
2695 	struct mlxsw_sp *mlxsw_sp;
2696 	unsigned long event;
2697 };
2698 
2699 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2700 {
2701 	struct mlxsw_sp_fib_event_work *fib_work =
2702 		container_of(work, struct mlxsw_sp_fib_event_work, work);
2703 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2704 	struct fib_rule *rule;
2705 	bool replace, append;
2706 	int err;
2707 
2708 	/* Protect internal structures from changes */
2709 	rtnl_lock();
2710 	switch (fib_work->event) {
2711 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2712 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
2713 	case FIB_EVENT_ENTRY_ADD:
2714 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2715 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2716 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2717 					       replace, append);
2718 		if (err)
2719 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
2720 		fib_info_put(fib_work->fen_info.fi);
2721 		break;
2722 	case FIB_EVENT_ENTRY_DEL:
2723 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2724 		fib_info_put(fib_work->fen_info.fi);
2725 		break;
2726 	case FIB_EVENT_RULE_ADD: /* fall through */
2727 	case FIB_EVENT_RULE_DEL:
2728 		rule = fib_work->fr_info.rule;
2729 		if (!fib4_rule_default(rule) && !rule->l3mdev)
2730 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
2731 		fib_rule_put(rule);
2732 		break;
2733 	case FIB_EVENT_NH_ADD: /* fall through */
2734 	case FIB_EVENT_NH_DEL:
2735 		mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2736 				       fib_work->fnh_info.fib_nh);
2737 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2738 		break;
2739 	}
2740 	rtnl_unlock();
2741 	kfree(fib_work);
2742 }
2743 
2744 /* Called with rcu_read_lock() */
2745 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2746 				     unsigned long event, void *ptr)
2747 {
2748 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2749 	struct mlxsw_sp_fib_event_work *fib_work;
2750 	struct fib_notifier_info *info = ptr;
2751 
2752 	if (!net_eq(info->net, &init_net))
2753 		return NOTIFY_DONE;
2754 
2755 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2756 	if (WARN_ON(!fib_work))
2757 		return NOTIFY_BAD;
2758 
2759 	INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2760 	fib_work->mlxsw_sp = mlxsw_sp;
2761 	fib_work->event = event;
2762 
2763 	switch (event) {
2764 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2765 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
2766 	case FIB_EVENT_ENTRY_ADD: /* fall through */
2767 	case FIB_EVENT_ENTRY_DEL:
2768 		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2769 		/* Take referece on fib_info to prevent it from being
2770 		 * freed while work is queued. Release it afterwards.
2771 		 */
2772 		fib_info_hold(fib_work->fen_info.fi);
2773 		break;
2774 	case FIB_EVENT_RULE_ADD: /* fall through */
2775 	case FIB_EVENT_RULE_DEL:
2776 		memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
2777 		fib_rule_get(fib_work->fr_info.rule);
2778 		break;
2779 	case FIB_EVENT_NH_ADD: /* fall through */
2780 	case FIB_EVENT_NH_DEL:
2781 		memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2782 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2783 		break;
2784 	}
2785 
2786 	mlxsw_core_schedule_work(&fib_work->work);
2787 
2788 	return NOTIFY_DONE;
2789 }
2790 
2791 static struct mlxsw_sp_rif *
2792 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
2793 			 const struct net_device *dev)
2794 {
2795 	int i;
2796 
2797 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2798 		if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
2799 			return mlxsw_sp->rifs[i];
2800 
2801 	return NULL;
2802 }
2803 
2804 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2805 {
2806 	char ritr_pl[MLXSW_REG_RITR_LEN];
2807 	int err;
2808 
2809 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2810 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2811 	if (WARN_ON_ONCE(err))
2812 		return err;
2813 
2814 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
2815 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2816 }
2817 
2818 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2819 					  struct mlxsw_sp_rif *rif)
2820 {
2821 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
2822 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
2823 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
2824 }
2825 
2826 static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif,
2827 				       const struct in_device *in_dev,
2828 				       unsigned long event)
2829 {
2830 	switch (event) {
2831 	case NETDEV_UP:
2832 		if (!rif)
2833 			return true;
2834 		return false;
2835 	case NETDEV_DOWN:
2836 		if (rif && !in_dev->ifa_list &&
2837 		    !netif_is_l3_slave(rif->dev))
2838 			return true;
2839 		/* It is possible we already removed the RIF ourselves
2840 		 * if it was assigned to a netdev that is now a bridge
2841 		 * or LAG slave.
2842 		 */
2843 		return false;
2844 	}
2845 
2846 	return false;
2847 }
2848 
2849 #define MLXSW_SP_INVALID_INDEX_RIF 0xffff
2850 static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
2851 {
2852 	int i;
2853 
2854 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2855 		if (!mlxsw_sp->rifs[i])
2856 			return i;
2857 
2858 	return MLXSW_SP_INVALID_INDEX_RIF;
2859 }
2860 
2861 static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
2862 					   bool *p_lagged, u16 *p_system_port)
2863 {
2864 	u8 local_port = mlxsw_sp_vport->local_port;
2865 
2866 	*p_lagged = mlxsw_sp_vport->lagged;
2867 	*p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
2868 }
2869 
2870 static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
2871 				    u16 vr_id, struct net_device *l3_dev,
2872 				    u16 rif_index, bool create)
2873 {
2874 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2875 	bool lagged = mlxsw_sp_vport->lagged;
2876 	char ritr_pl[MLXSW_REG_RITR_LEN];
2877 	u16 system_port;
2878 
2879 	mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif_index,
2880 			    vr_id, l3_dev->mtu, l3_dev->dev_addr);
2881 
2882 	mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
2883 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
2884 				  mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
2885 
2886 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2887 }
2888 
2889 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
2890 
2891 static u16 mlxsw_sp_rif_sp_to_fid(u16 rif_index)
2892 {
2893 	return MLXSW_SP_RFID_BASE + rif_index;
2894 }
2895 
2896 static struct mlxsw_sp_fid *
2897 mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
2898 {
2899 	struct mlxsw_sp_fid *f;
2900 
2901 	f = kzalloc(sizeof(*f), GFP_KERNEL);
2902 	if (!f)
2903 		return NULL;
2904 
2905 	f->leave = mlxsw_sp_vport_rif_sp_leave;
2906 	f->ref_count = 0;
2907 	f->dev = l3_dev;
2908 	f->fid = fid;
2909 
2910 	return f;
2911 }
2912 
2913 static struct mlxsw_sp_rif *
2914 mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev,
2915 		   struct mlxsw_sp_fid *f)
2916 {
2917 	struct mlxsw_sp_rif *rif;
2918 
2919 	rif = kzalloc(sizeof(*rif), GFP_KERNEL);
2920 	if (!rif)
2921 		return NULL;
2922 
2923 	INIT_LIST_HEAD(&rif->nexthop_list);
2924 	INIT_LIST_HEAD(&rif->neigh_list);
2925 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
2926 	rif->mtu = l3_dev->mtu;
2927 	rif->vr_id = vr_id;
2928 	rif->dev = l3_dev;
2929 	rif->rif_index = rif_index;
2930 	rif->f = f;
2931 
2932 	return rif;
2933 }
2934 
2935 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
2936 {
2937 	return rif->rif_index;
2938 }
2939 
2940 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
2941 {
2942 	return rif->dev->ifindex;
2943 }
2944 
2945 static struct mlxsw_sp_rif *
2946 mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
2947 			     struct net_device *l3_dev)
2948 {
2949 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2950 	u32 tb_id = l3mdev_fib_table(l3_dev);
2951 	struct mlxsw_sp_vr *vr;
2952 	struct mlxsw_sp_fid *f;
2953 	struct mlxsw_sp_rif *rif;
2954 	u16 fid, rif_index;
2955 	int err;
2956 
2957 	rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
2958 	if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
2959 		return ERR_PTR(-ERANGE);
2960 
2961 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
2962 	if (IS_ERR(vr))
2963 		return ERR_CAST(vr);
2964 
2965 	err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev,
2966 				       rif_index, true);
2967 	if (err)
2968 		goto err_vport_rif_sp_op;
2969 
2970 	fid = mlxsw_sp_rif_sp_to_fid(rif_index);
2971 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
2972 	if (err)
2973 		goto err_rif_fdb_op;
2974 
2975 	f = mlxsw_sp_rfid_alloc(fid, l3_dev);
2976 	if (!f) {
2977 		err = -ENOMEM;
2978 		goto err_rfid_alloc;
2979 	}
2980 
2981 	rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
2982 	if (!rif) {
2983 		err = -ENOMEM;
2984 		goto err_rif_alloc;
2985 	}
2986 
2987 	if (devlink_dpipe_table_counter_enabled(priv_to_devlink(mlxsw_sp->core),
2988 						MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) {
2989 		err = mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif,
2990 						 MLXSW_SP_RIF_COUNTER_EGRESS);
2991 		if (err)
2992 			netdev_dbg(mlxsw_sp_vport->dev,
2993 				   "Counter alloc Failed err=%d\n", err);
2994 	}
2995 
2996 	f->rif = rif;
2997 	mlxsw_sp->rifs[rif_index] = rif;
2998 	vr->rif_count++;
2999 
3000 	return rif;
3001 
3002 err_rif_alloc:
3003 	kfree(f);
3004 err_rfid_alloc:
3005 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
3006 err_rif_fdb_op:
3007 	mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
3008 				 false);
3009 err_vport_rif_sp_op:
3010 	mlxsw_sp_vr_put(vr);
3011 	return ERR_PTR(err);
3012 }
3013 
3014 static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
3015 					  struct mlxsw_sp_rif *rif)
3016 {
3017 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
3018 	struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
3019 	struct net_device *l3_dev = rif->dev;
3020 	struct mlxsw_sp_fid *f = rif->f;
3021 	u16 rif_index = rif->rif_index;
3022 	u16 fid = f->fid;
3023 
3024 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
3025 
3026 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
3027 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_INGRESS);
3028 
3029 	vr->rif_count--;
3030 	mlxsw_sp->rifs[rif_index] = NULL;
3031 	f->rif = NULL;
3032 
3033 	kfree(rif);
3034 
3035 	kfree(f);
3036 
3037 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
3038 
3039 	mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
3040 				 false);
3041 	mlxsw_sp_vr_put(vr);
3042 }
3043 
3044 static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
3045 				      struct net_device *l3_dev)
3046 {
3047 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
3048 	struct mlxsw_sp_rif *rif;
3049 
3050 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3051 	if (!rif) {
3052 		rif = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
3053 		if (IS_ERR(rif))
3054 			return PTR_ERR(rif);
3055 	}
3056 
3057 	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, rif->f);
3058 	rif->f->ref_count++;
3059 
3060 	netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", rif->f->fid);
3061 
3062 	return 0;
3063 }
3064 
3065 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
3066 {
3067 	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
3068 
3069 	netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
3070 
3071 	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
3072 	if (--f->ref_count == 0)
3073 		mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->rif);
3074 }
3075 
3076 static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
3077 					 struct net_device *port_dev,
3078 					 unsigned long event, u16 vid)
3079 {
3080 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
3081 	struct mlxsw_sp_port *mlxsw_sp_vport;
3082 
3083 	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
3084 	if (WARN_ON(!mlxsw_sp_vport))
3085 		return -EINVAL;
3086 
3087 	switch (event) {
3088 	case NETDEV_UP:
3089 		return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
3090 	case NETDEV_DOWN:
3091 		mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
3092 		break;
3093 	}
3094 
3095 	return 0;
3096 }
3097 
3098 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
3099 					unsigned long event)
3100 {
3101 	if (netif_is_bridge_port(port_dev) ||
3102 	    netif_is_lag_port(port_dev) ||
3103 	    netif_is_ovs_port(port_dev))
3104 		return 0;
3105 
3106 	return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
3107 }
3108 
3109 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
3110 					 struct net_device *lag_dev,
3111 					 unsigned long event, u16 vid)
3112 {
3113 	struct net_device *port_dev;
3114 	struct list_head *iter;
3115 	int err;
3116 
3117 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
3118 		if (mlxsw_sp_port_dev_check(port_dev)) {
3119 			err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
3120 							    event, vid);
3121 			if (err)
3122 				return err;
3123 		}
3124 	}
3125 
3126 	return 0;
3127 }
3128 
3129 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
3130 				       unsigned long event)
3131 {
3132 	if (netif_is_bridge_port(lag_dev))
3133 		return 0;
3134 
3135 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
3136 }
3137 
3138 static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
3139 						    struct net_device *l3_dev)
3140 {
3141 	u16 fid;
3142 
3143 	if (is_vlan_dev(l3_dev))
3144 		fid = vlan_dev_vlan_id(l3_dev);
3145 	else if (mlxsw_sp->master_bridge.dev == l3_dev)
3146 		fid = 1;
3147 	else
3148 		return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
3149 
3150 	return mlxsw_sp_fid_find(mlxsw_sp, fid);
3151 }
3152 
3153 static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
3154 {
3155 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
3156 }
3157 
3158 static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
3159 {
3160 	return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
3161 	       MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
3162 }
3163 
3164 static u16 mlxsw_sp_flood_table_index_get(u16 fid)
3165 {
3166 	return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
3167 }
3168 
3169 static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
3170 					  bool set)
3171 {
3172 	u8 router_port = mlxsw_sp_router_port(mlxsw_sp);
3173 	enum mlxsw_flood_table_type table_type;
3174 	char *sftr_pl;
3175 	u16 index;
3176 	int err;
3177 
3178 	sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
3179 	if (!sftr_pl)
3180 		return -ENOMEM;
3181 
3182 	table_type = mlxsw_sp_flood_table_type_get(fid);
3183 	index = mlxsw_sp_flood_table_index_get(fid);
3184 	mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type,
3185 			    1, router_port, set);
3186 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
3187 
3188 	kfree(sftr_pl);
3189 	return err;
3190 }
3191 
3192 static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
3193 {
3194 	if (mlxsw_sp_fid_is_vfid(fid))
3195 		return MLXSW_REG_RITR_FID_IF;
3196 	else
3197 		return MLXSW_REG_RITR_VLAN_IF;
3198 }
3199 
3200 static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
3201 				  struct net_device *l3_dev,
3202 				  u16 fid, u16 rif,
3203 				  bool create)
3204 {
3205 	enum mlxsw_reg_ritr_if_type rif_type;
3206 	char ritr_pl[MLXSW_REG_RITR_LEN];
3207 
3208 	rif_type = mlxsw_sp_rif_type_get(fid);
3209 	mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, vr_id, l3_dev->mtu,
3210 			    l3_dev->dev_addr);
3211 	mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
3212 
3213 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3214 }
3215 
3216 static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
3217 				      struct net_device *l3_dev,
3218 				      struct mlxsw_sp_fid *f)
3219 {
3220 	u32 tb_id = l3mdev_fib_table(l3_dev);
3221 	struct mlxsw_sp_rif *rif;
3222 	struct mlxsw_sp_vr *vr;
3223 	u16 rif_index;
3224 	int err;
3225 
3226 	rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
3227 	if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
3228 		return -ERANGE;
3229 
3230 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
3231 	if (IS_ERR(vr))
3232 		return PTR_ERR(vr);
3233 
3234 	err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
3235 	if (err)
3236 		goto err_port_flood_set;
3237 
3238 	err = mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid,
3239 				     rif_index, true);
3240 	if (err)
3241 		goto err_rif_bridge_op;
3242 
3243 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
3244 	if (err)
3245 		goto err_rif_fdb_op;
3246 
3247 	rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
3248 	if (!rif) {
3249 		err = -ENOMEM;
3250 		goto err_rif_alloc;
3251 	}
3252 
3253 	f->rif = rif;
3254 	mlxsw_sp->rifs[rif_index] = rif;
3255 	vr->rif_count++;
3256 
3257 	netdev_dbg(l3_dev, "RIF=%d created\n", rif_index);
3258 
3259 	return 0;
3260 
3261 err_rif_alloc:
3262 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
3263 err_rif_fdb_op:
3264 	mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
3265 			       false);
3266 err_rif_bridge_op:
3267 	mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3268 err_port_flood_set:
3269 	mlxsw_sp_vr_put(vr);
3270 	return err;
3271 }
3272 
3273 void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
3274 				 struct mlxsw_sp_rif *rif)
3275 {
3276 	struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
3277 	struct net_device *l3_dev = rif->dev;
3278 	struct mlxsw_sp_fid *f = rif->f;
3279 	u16 rif_index = rif->rif_index;
3280 
3281 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
3282 
3283 	vr->rif_count--;
3284 	mlxsw_sp->rifs[rif_index] = NULL;
3285 	f->rif = NULL;
3286 
3287 	kfree(rif);
3288 
3289 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
3290 
3291 	mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
3292 			       false);
3293 
3294 	mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3295 
3296 	mlxsw_sp_vr_put(vr);
3297 
3298 	netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif_index);
3299 }
3300 
3301 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
3302 					  struct net_device *br_dev,
3303 					  unsigned long event)
3304 {
3305 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3306 	struct mlxsw_sp_fid *f;
3307 
3308 	/* FID can either be an actual FID if the L3 device is the
3309 	 * VLAN-aware bridge or a VLAN device on top. Otherwise, the
3310 	 * L3 device is a VLAN-unaware bridge and we get a vFID.
3311 	 */
3312 	f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
3313 	if (WARN_ON(!f))
3314 		return -EINVAL;
3315 
3316 	switch (event) {
3317 	case NETDEV_UP:
3318 		return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
3319 	case NETDEV_DOWN:
3320 		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
3321 		break;
3322 	}
3323 
3324 	return 0;
3325 }
3326 
3327 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
3328 					unsigned long event)
3329 {
3330 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
3331 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
3332 	u16 vid = vlan_dev_vlan_id(vlan_dev);
3333 
3334 	if (mlxsw_sp_port_dev_check(real_dev))
3335 		return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
3336 						     vid);
3337 	else if (netif_is_lag_master(real_dev))
3338 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
3339 						     vid);
3340 	else if (netif_is_bridge_master(real_dev) &&
3341 		 mlxsw_sp->master_bridge.dev == real_dev)
3342 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
3343 						      event);
3344 
3345 	return 0;
3346 }
3347 
3348 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
3349 				     unsigned long event)
3350 {
3351 	if (mlxsw_sp_port_dev_check(dev))
3352 		return mlxsw_sp_inetaddr_port_event(dev, event);
3353 	else if (netif_is_lag_master(dev))
3354 		return mlxsw_sp_inetaddr_lag_event(dev, event);
3355 	else if (netif_is_bridge_master(dev))
3356 		return mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
3357 	else if (is_vlan_dev(dev))
3358 		return mlxsw_sp_inetaddr_vlan_event(dev, event);
3359 	else
3360 		return 0;
3361 }
3362 
3363 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
3364 			    unsigned long event, void *ptr)
3365 {
3366 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
3367 	struct net_device *dev = ifa->ifa_dev->dev;
3368 	struct mlxsw_sp *mlxsw_sp;
3369 	struct mlxsw_sp_rif *rif;
3370 	int err = 0;
3371 
3372 	mlxsw_sp = mlxsw_sp_lower_get(dev);
3373 	if (!mlxsw_sp)
3374 		goto out;
3375 
3376 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3377 	if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event))
3378 		goto out;
3379 
3380 	err = __mlxsw_sp_inetaddr_event(dev, event);
3381 out:
3382 	return notifier_from_errno(err);
3383 }
3384 
3385 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
3386 			     const char *mac, int mtu)
3387 {
3388 	char ritr_pl[MLXSW_REG_RITR_LEN];
3389 	int err;
3390 
3391 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
3392 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3393 	if (err)
3394 		return err;
3395 
3396 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
3397 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
3398 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
3399 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3400 }
3401 
3402 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
3403 {
3404 	struct mlxsw_sp *mlxsw_sp;
3405 	struct mlxsw_sp_rif *rif;
3406 	int err;
3407 
3408 	mlxsw_sp = mlxsw_sp_lower_get(dev);
3409 	if (!mlxsw_sp)
3410 		return 0;
3411 
3412 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3413 	if (!rif)
3414 		return 0;
3415 
3416 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, false);
3417 	if (err)
3418 		return err;
3419 
3420 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
3421 				dev->mtu);
3422 	if (err)
3423 		goto err_rif_edit;
3424 
3425 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, rif->f->fid, true);
3426 	if (err)
3427 		goto err_rif_fdb_op;
3428 
3429 	ether_addr_copy(rif->addr, dev->dev_addr);
3430 	rif->mtu = dev->mtu;
3431 
3432 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
3433 
3434 	return 0;
3435 
3436 err_rif_fdb_op:
3437 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
3438 err_rif_edit:
3439 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, true);
3440 	return err;
3441 }
3442 
3443 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
3444 				  struct net_device *l3_dev)
3445 {
3446 	struct mlxsw_sp_rif *rif;
3447 
3448 	/* If netdev is already associated with a RIF, then we need to
3449 	 * destroy it and create a new one with the new virtual router ID.
3450 	 */
3451 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3452 	if (rif)
3453 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
3454 
3455 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
3456 }
3457 
3458 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
3459 				    struct net_device *l3_dev)
3460 {
3461 	struct mlxsw_sp_rif *rif;
3462 
3463 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3464 	if (!rif)
3465 		return;
3466 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
3467 }
3468 
3469 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
3470 				 struct netdev_notifier_changeupper_info *info)
3471 {
3472 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3473 	int err = 0;
3474 
3475 	if (!mlxsw_sp)
3476 		return 0;
3477 
3478 	switch (event) {
3479 	case NETDEV_PRECHANGEUPPER:
3480 		return 0;
3481 	case NETDEV_CHANGEUPPER:
3482 		if (info->linking)
3483 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
3484 		else
3485 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
3486 		break;
3487 	}
3488 
3489 	return err;
3490 }
3491 
3492 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
3493 {
3494 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
3495 
3496 	/* Flush pending FIB notifications and then flush the device's
3497 	 * table before requesting another dump. The FIB notification
3498 	 * block is unregistered, so no need to take RTNL.
3499 	 */
3500 	mlxsw_core_flush_owq();
3501 	mlxsw_sp_router_fib_flush(mlxsw_sp);
3502 }
3503 
3504 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3505 {
3506 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
3507 	u64 max_rifs;
3508 	int err;
3509 
3510 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
3511 		return -EIO;
3512 
3513 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
3514 	mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
3515 				 GFP_KERNEL);
3516 	if (!mlxsw_sp->rifs)
3517 		return -ENOMEM;
3518 
3519 	mlxsw_reg_rgcr_pack(rgcr_pl, true);
3520 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
3521 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3522 	if (err)
3523 		goto err_rgcr_fail;
3524 
3525 	return 0;
3526 
3527 err_rgcr_fail:
3528 	kfree(mlxsw_sp->rifs);
3529 	return err;
3530 }
3531 
3532 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3533 {
3534 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
3535 	int i;
3536 
3537 	mlxsw_reg_rgcr_pack(rgcr_pl, false);
3538 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3539 
3540 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
3541 		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
3542 
3543 	kfree(mlxsw_sp->rifs);
3544 }
3545 
3546 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3547 {
3548 	int err;
3549 
3550 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
3551 	err = __mlxsw_sp_router_init(mlxsw_sp);
3552 	if (err)
3553 		return err;
3554 
3555 	err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
3556 			      &mlxsw_sp_nexthop_ht_params);
3557 	if (err)
3558 		goto err_nexthop_ht_init;
3559 
3560 	err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
3561 			      &mlxsw_sp_nexthop_group_ht_params);
3562 	if (err)
3563 		goto err_nexthop_group_ht_init;
3564 
3565 	err = mlxsw_sp_lpm_init(mlxsw_sp);
3566 	if (err)
3567 		goto err_lpm_init;
3568 
3569 	err = mlxsw_sp_vrs_init(mlxsw_sp);
3570 	if (err)
3571 		goto err_vrs_init;
3572 
3573 	err = mlxsw_sp_neigh_init(mlxsw_sp);
3574 	if (err)
3575 		goto err_neigh_init;
3576 
3577 	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
3578 	err = register_fib_notifier(&mlxsw_sp->fib_nb,
3579 				    mlxsw_sp_router_fib_dump_flush);
3580 	if (err)
3581 		goto err_register_fib_notifier;
3582 
3583 	return 0;
3584 
3585 err_register_fib_notifier:
3586 	mlxsw_sp_neigh_fini(mlxsw_sp);
3587 err_neigh_init:
3588 	mlxsw_sp_vrs_fini(mlxsw_sp);
3589 err_vrs_init:
3590 	mlxsw_sp_lpm_fini(mlxsw_sp);
3591 err_lpm_init:
3592 	rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3593 err_nexthop_group_ht_init:
3594 	rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3595 err_nexthop_ht_init:
3596 	__mlxsw_sp_router_fini(mlxsw_sp);
3597 	return err;
3598 }
3599 
3600 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3601 {
3602 	unregister_fib_notifier(&mlxsw_sp->fib_nb);
3603 	mlxsw_sp_neigh_fini(mlxsw_sp);
3604 	mlxsw_sp_vrs_fini(mlxsw_sp);
3605 	mlxsw_sp_lpm_fini(mlxsw_sp);
3606 	rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3607 	rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3608 	__mlxsw_sp_router_fini(mlxsw_sp);
3609 }
3610