1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <net/netevent.h>
45 #include <net/neighbour.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 
49 #include "spectrum.h"
50 #include "core.h"
51 #include "reg.h"
52 
53 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
54 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
55 
56 static bool
57 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
58 			     struct mlxsw_sp_prefix_usage *prefix_usage2)
59 {
60 	unsigned char prefix;
61 
62 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
63 		if (!test_bit(prefix, prefix_usage2->b))
64 			return false;
65 	}
66 	return true;
67 }
68 
69 static bool
70 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
71 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
72 {
73 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
74 }
75 
76 static bool
77 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
78 {
79 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
80 
81 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
82 }
83 
84 static void
85 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
86 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
87 {
88 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
89 }
90 
91 static void
92 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
93 {
94 	memset(prefix_usage, 0, sizeof(*prefix_usage));
95 }
96 
97 static void
98 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
99 			  unsigned char prefix_len)
100 {
101 	set_bit(prefix_len, prefix_usage->b);
102 }
103 
104 static void
105 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
106 			    unsigned char prefix_len)
107 {
108 	clear_bit(prefix_len, prefix_usage->b);
109 }
110 
111 struct mlxsw_sp_fib_key {
112 	unsigned char addr[sizeof(struct in6_addr)];
113 	unsigned char prefix_len;
114 };
115 
116 enum mlxsw_sp_fib_entry_type {
117 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
120 };
121 
122 struct mlxsw_sp_nexthop_group;
123 
124 struct mlxsw_sp_fib_node {
125 	struct list_head entry_list;
126 	struct list_head list;
127 	struct rhash_head ht_node;
128 	struct mlxsw_sp_vr *vr;
129 	struct mlxsw_sp_fib_key key;
130 };
131 
132 struct mlxsw_sp_fib_entry_params {
133 	u32 tb_id;
134 	u32 prio;
135 	u8 tos;
136 	u8 type;
137 };
138 
139 struct mlxsw_sp_fib_entry {
140 	struct list_head list;
141 	struct mlxsw_sp_fib_node *fib_node;
142 	enum mlxsw_sp_fib_entry_type type;
143 	struct list_head nexthop_group_node;
144 	struct mlxsw_sp_nexthop_group *nh_group;
145 	struct mlxsw_sp_fib_entry_params params;
146 	bool offloaded;
147 };
148 
149 struct mlxsw_sp_fib {
150 	struct rhashtable ht;
151 	struct list_head node_list;
152 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
153 	struct mlxsw_sp_prefix_usage prefix_usage;
154 };
155 
156 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
157 
158 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
159 {
160 	struct mlxsw_sp_fib *fib;
161 	int err;
162 
163 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
164 	if (!fib)
165 		return ERR_PTR(-ENOMEM);
166 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
167 	if (err)
168 		goto err_rhashtable_init;
169 	INIT_LIST_HEAD(&fib->node_list);
170 	return fib;
171 
172 err_rhashtable_init:
173 	kfree(fib);
174 	return ERR_PTR(err);
175 }
176 
177 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
178 {
179 	WARN_ON(!list_empty(&fib->node_list));
180 	rhashtable_destroy(&fib->ht);
181 	kfree(fib);
182 }
183 
184 static struct mlxsw_sp_lpm_tree *
185 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
186 {
187 	static struct mlxsw_sp_lpm_tree *lpm_tree;
188 	int i;
189 
190 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
191 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
192 		if (lpm_tree->ref_count == 0) {
193 			if (one_reserved)
194 				one_reserved = false;
195 			else
196 				return lpm_tree;
197 		}
198 	}
199 	return NULL;
200 }
201 
202 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
203 				   struct mlxsw_sp_lpm_tree *lpm_tree)
204 {
205 	char ralta_pl[MLXSW_REG_RALTA_LEN];
206 
207 	mlxsw_reg_ralta_pack(ralta_pl, true,
208 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
209 			     lpm_tree->id);
210 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
211 }
212 
213 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
214 				  struct mlxsw_sp_lpm_tree *lpm_tree)
215 {
216 	char ralta_pl[MLXSW_REG_RALTA_LEN];
217 
218 	mlxsw_reg_ralta_pack(ralta_pl, false,
219 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
220 			     lpm_tree->id);
221 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
222 }
223 
224 static int
225 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
226 				  struct mlxsw_sp_prefix_usage *prefix_usage,
227 				  struct mlxsw_sp_lpm_tree *lpm_tree)
228 {
229 	char ralst_pl[MLXSW_REG_RALST_LEN];
230 	u8 root_bin = 0;
231 	u8 prefix;
232 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
233 
234 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
235 		root_bin = prefix;
236 
237 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
238 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
239 		if (prefix == 0)
240 			continue;
241 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
242 					 MLXSW_REG_RALST_BIN_NO_CHILD);
243 		last_prefix = prefix;
244 	}
245 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
246 }
247 
248 static struct mlxsw_sp_lpm_tree *
249 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
250 			 struct mlxsw_sp_prefix_usage *prefix_usage,
251 			 enum mlxsw_sp_l3proto proto, bool one_reserved)
252 {
253 	struct mlxsw_sp_lpm_tree *lpm_tree;
254 	int err;
255 
256 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
257 	if (!lpm_tree)
258 		return ERR_PTR(-EBUSY);
259 	lpm_tree->proto = proto;
260 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
261 	if (err)
262 		return ERR_PTR(err);
263 
264 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
265 						lpm_tree);
266 	if (err)
267 		goto err_left_struct_set;
268 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
269 	       sizeof(lpm_tree->prefix_usage));
270 	return lpm_tree;
271 
272 err_left_struct_set:
273 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
274 	return ERR_PTR(err);
275 }
276 
277 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
278 				     struct mlxsw_sp_lpm_tree *lpm_tree)
279 {
280 	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
281 }
282 
283 static struct mlxsw_sp_lpm_tree *
284 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
285 		      struct mlxsw_sp_prefix_usage *prefix_usage,
286 		      enum mlxsw_sp_l3proto proto, bool one_reserved)
287 {
288 	struct mlxsw_sp_lpm_tree *lpm_tree;
289 	int i;
290 
291 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
292 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
293 		if (lpm_tree->ref_count != 0 &&
294 		    lpm_tree->proto == proto &&
295 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
296 					     prefix_usage))
297 			goto inc_ref_count;
298 	}
299 	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
300 					    proto, one_reserved);
301 	if (IS_ERR(lpm_tree))
302 		return lpm_tree;
303 
304 inc_ref_count:
305 	lpm_tree->ref_count++;
306 	return lpm_tree;
307 }
308 
309 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
310 				 struct mlxsw_sp_lpm_tree *lpm_tree)
311 {
312 	if (--lpm_tree->ref_count == 0)
313 		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
314 	return 0;
315 }
316 
317 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
318 {
319 	struct mlxsw_sp_lpm_tree *lpm_tree;
320 	int i;
321 
322 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
323 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
324 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
325 	}
326 }
327 
328 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
329 {
330 	struct mlxsw_sp_vr *vr;
331 	int i;
332 
333 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
334 		vr = &mlxsw_sp->router.vrs[i];
335 		if (!vr->used)
336 			return vr;
337 	}
338 	return NULL;
339 }
340 
341 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
342 				     struct mlxsw_sp_vr *vr)
343 {
344 	char raltb_pl[MLXSW_REG_RALTB_LEN];
345 
346 	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
347 			     (enum mlxsw_reg_ralxx_protocol) vr->proto,
348 			     vr->lpm_tree->id);
349 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
350 }
351 
352 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
353 				       struct mlxsw_sp_vr *vr)
354 {
355 	char raltb_pl[MLXSW_REG_RALTB_LEN];
356 
357 	/* Bind to tree 0 which is default */
358 	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
359 			     (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
360 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
361 }
362 
363 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
364 {
365 	/* For our purpose, squash main and local table into one */
366 	if (tb_id == RT_TABLE_LOCAL)
367 		tb_id = RT_TABLE_MAIN;
368 	return tb_id;
369 }
370 
371 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
372 					    u32 tb_id,
373 					    enum mlxsw_sp_l3proto proto)
374 {
375 	struct mlxsw_sp_vr *vr;
376 	int i;
377 
378 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
379 
380 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
381 		vr = &mlxsw_sp->router.vrs[i];
382 		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
383 			return vr;
384 	}
385 	return NULL;
386 }
387 
388 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
389 					      unsigned char prefix_len,
390 					      u32 tb_id,
391 					      enum mlxsw_sp_l3proto proto)
392 {
393 	struct mlxsw_sp_prefix_usage req_prefix_usage;
394 	struct mlxsw_sp_lpm_tree *lpm_tree;
395 	struct mlxsw_sp_vr *vr;
396 	int err;
397 
398 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
399 	if (!vr)
400 		return ERR_PTR(-EBUSY);
401 	vr->fib = mlxsw_sp_fib_create();
402 	if (IS_ERR(vr->fib))
403 		return ERR_CAST(vr->fib);
404 
405 	vr->proto = proto;
406 	vr->tb_id = tb_id;
407 	mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
408 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
409 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
410 					 proto, true);
411 	if (IS_ERR(lpm_tree)) {
412 		err = PTR_ERR(lpm_tree);
413 		goto err_tree_get;
414 	}
415 	vr->lpm_tree = lpm_tree;
416 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
417 	if (err)
418 		goto err_tree_bind;
419 
420 	vr->used = true;
421 	return vr;
422 
423 err_tree_bind:
424 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
425 err_tree_get:
426 	mlxsw_sp_fib_destroy(vr->fib);
427 
428 	return ERR_PTR(err);
429 }
430 
431 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
432 				struct mlxsw_sp_vr *vr)
433 {
434 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
435 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
436 	mlxsw_sp_fib_destroy(vr->fib);
437 	vr->used = false;
438 }
439 
440 static int
441 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
442 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
443 {
444 	struct mlxsw_sp_lpm_tree *lpm_tree;
445 
446 	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
447 				     &vr->lpm_tree->prefix_usage))
448 		return 0;
449 
450 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
451 					 vr->proto, false);
452 	if (IS_ERR(lpm_tree)) {
453 		/* We failed to get a tree according to the required
454 		 * prefix usage. However, the current tree might be still good
455 		 * for us if our requirement is subset of the prefixes used
456 		 * in the tree.
457 		 */
458 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
459 						 &vr->lpm_tree->prefix_usage))
460 			return 0;
461 		return PTR_ERR(lpm_tree);
462 	}
463 
464 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
465 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
466 	vr->lpm_tree = lpm_tree;
467 	return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
468 }
469 
470 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
471 					   unsigned char prefix_len,
472 					   u32 tb_id,
473 					   enum mlxsw_sp_l3proto proto)
474 {
475 	struct mlxsw_sp_vr *vr;
476 	int err;
477 
478 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
479 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
480 	if (!vr) {
481 		vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
482 		if (IS_ERR(vr))
483 			return vr;
484 	} else {
485 		struct mlxsw_sp_prefix_usage req_prefix_usage;
486 
487 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
488 					  &vr->fib->prefix_usage);
489 		mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
490 		/* Need to replace LPM tree in case new prefix is required. */
491 		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
492 						 &req_prefix_usage);
493 		if (err)
494 			return ERR_PTR(err);
495 	}
496 	return vr;
497 }
498 
499 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
500 {
501 	/* Destroy virtual router entity in case the associated FIB is empty
502 	 * and allow it to be used for other tables in future. Otherwise,
503 	 * check if some prefix usage did not disappear and change tree if
504 	 * that is the case. Note that in case new, smaller tree cannot be
505 	 * allocated, the original one will be kept being used.
506 	 */
507 	if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
508 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
509 	else
510 		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
511 					   &vr->fib->prefix_usage);
512 }
513 
514 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
515 {
516 	struct mlxsw_sp_vr *vr;
517 	u64 max_vrs;
518 	int i;
519 
520 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
521 		return -EIO;
522 
523 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
524 	mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
525 				       GFP_KERNEL);
526 	if (!mlxsw_sp->router.vrs)
527 		return -ENOMEM;
528 
529 	for (i = 0; i < max_vrs; i++) {
530 		vr = &mlxsw_sp->router.vrs[i];
531 		vr->id = i;
532 	}
533 
534 	return 0;
535 }
536 
537 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
538 
539 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
540 {
541 	/* At this stage we're guaranteed not to have new incoming
542 	 * FIB notifications and the work queue is free from FIBs
543 	 * sitting on top of mlxsw netdevs. However, we can still
544 	 * have other FIBs queued. Flush the queue before flushing
545 	 * the device's tables. No need for locks, as we're the only
546 	 * writer.
547 	 */
548 	mlxsw_core_flush_owq();
549 	mlxsw_sp_router_fib_flush(mlxsw_sp);
550 	kfree(mlxsw_sp->router.vrs);
551 }
552 
553 struct mlxsw_sp_neigh_key {
554 	struct neighbour *n;
555 };
556 
557 struct mlxsw_sp_neigh_entry {
558 	struct list_head rif_list_node;
559 	struct rhash_head ht_node;
560 	struct mlxsw_sp_neigh_key key;
561 	u16 rif;
562 	bool connected;
563 	unsigned char ha[ETH_ALEN];
564 	struct list_head nexthop_list; /* list of nexthops using
565 					* this neigh entry
566 					*/
567 	struct list_head nexthop_neighs_list_node;
568 };
569 
570 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
571 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
572 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
573 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
574 };
575 
576 static struct mlxsw_sp_neigh_entry *
577 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
578 			   u16 rif)
579 {
580 	struct mlxsw_sp_neigh_entry *neigh_entry;
581 
582 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
583 	if (!neigh_entry)
584 		return NULL;
585 
586 	neigh_entry->key.n = n;
587 	neigh_entry->rif = rif;
588 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
589 
590 	return neigh_entry;
591 }
592 
593 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
594 {
595 	kfree(neigh_entry);
596 }
597 
598 static int
599 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
600 			    struct mlxsw_sp_neigh_entry *neigh_entry)
601 {
602 	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
603 				      &neigh_entry->ht_node,
604 				      mlxsw_sp_neigh_ht_params);
605 }
606 
607 static void
608 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
609 			    struct mlxsw_sp_neigh_entry *neigh_entry)
610 {
611 	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
612 			       &neigh_entry->ht_node,
613 			       mlxsw_sp_neigh_ht_params);
614 }
615 
616 static struct mlxsw_sp_neigh_entry *
617 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
618 {
619 	struct mlxsw_sp_neigh_entry *neigh_entry;
620 	struct mlxsw_sp_rif *r;
621 	int err;
622 
623 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
624 	if (!r)
625 		return ERR_PTR(-EINVAL);
626 
627 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
628 	if (!neigh_entry)
629 		return ERR_PTR(-ENOMEM);
630 
631 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
632 	if (err)
633 		goto err_neigh_entry_insert;
634 
635 	list_add(&neigh_entry->rif_list_node, &r->neigh_list);
636 
637 	return neigh_entry;
638 
639 err_neigh_entry_insert:
640 	mlxsw_sp_neigh_entry_free(neigh_entry);
641 	return ERR_PTR(err);
642 }
643 
644 static void
645 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
646 			     struct mlxsw_sp_neigh_entry *neigh_entry)
647 {
648 	list_del(&neigh_entry->rif_list_node);
649 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
650 	mlxsw_sp_neigh_entry_free(neigh_entry);
651 }
652 
653 static struct mlxsw_sp_neigh_entry *
654 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
655 {
656 	struct mlxsw_sp_neigh_key key;
657 
658 	key.n = n;
659 	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
660 				      &key, mlxsw_sp_neigh_ht_params);
661 }
662 
663 static void
664 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
665 {
666 	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
667 
668 	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
669 }
670 
671 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
672 						   char *rauhtd_pl,
673 						   int ent_index)
674 {
675 	struct net_device *dev;
676 	struct neighbour *n;
677 	__be32 dipn;
678 	u32 dip;
679 	u16 rif;
680 
681 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
682 
683 	if (!mlxsw_sp->rifs[rif]) {
684 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
685 		return;
686 	}
687 
688 	dipn = htonl(dip);
689 	dev = mlxsw_sp->rifs[rif]->dev;
690 	n = neigh_lookup(&arp_tbl, &dipn, dev);
691 	if (!n) {
692 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
693 			   &dip);
694 		return;
695 	}
696 
697 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
698 	neigh_event_send(n, NULL);
699 	neigh_release(n);
700 }
701 
702 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
703 						   char *rauhtd_pl,
704 						   int rec_index)
705 {
706 	u8 num_entries;
707 	int i;
708 
709 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
710 								rec_index);
711 	/* Hardware starts counting at 0, so add 1. */
712 	num_entries++;
713 
714 	/* Each record consists of several neighbour entries. */
715 	for (i = 0; i < num_entries; i++) {
716 		int ent_index;
717 
718 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
719 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
720 						       ent_index);
721 	}
722 
723 }
724 
725 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
726 					      char *rauhtd_pl, int rec_index)
727 {
728 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
729 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
730 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
731 						       rec_index);
732 		break;
733 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
734 		WARN_ON_ONCE(1);
735 		break;
736 	}
737 }
738 
739 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
740 {
741 	u8 num_rec, last_rec_index, num_entries;
742 
743 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
744 	last_rec_index = num_rec - 1;
745 
746 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
747 		return false;
748 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
749 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
750 		return true;
751 
752 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
753 								last_rec_index);
754 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
755 		return true;
756 	return false;
757 }
758 
759 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
760 {
761 	char *rauhtd_pl;
762 	u8 num_rec;
763 	int i, err;
764 
765 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
766 	if (!rauhtd_pl)
767 		return -ENOMEM;
768 
769 	/* Make sure the neighbour's netdev isn't removed in the
770 	 * process.
771 	 */
772 	rtnl_lock();
773 	do {
774 		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
775 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
776 				      rauhtd_pl);
777 		if (err) {
778 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
779 			break;
780 		}
781 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
782 		for (i = 0; i < num_rec; i++)
783 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
784 							  i);
785 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
786 	rtnl_unlock();
787 
788 	kfree(rauhtd_pl);
789 	return err;
790 }
791 
792 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
793 {
794 	struct mlxsw_sp_neigh_entry *neigh_entry;
795 
796 	/* Take RTNL mutex here to prevent lists from changes */
797 	rtnl_lock();
798 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
799 			    nexthop_neighs_list_node)
800 		/* If this neigh have nexthops, make the kernel think this neigh
801 		 * is active regardless of the traffic.
802 		 */
803 		neigh_event_send(neigh_entry->key.n, NULL);
804 	rtnl_unlock();
805 }
806 
807 static void
808 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
809 {
810 	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
811 
812 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
813 			       msecs_to_jiffies(interval));
814 }
815 
816 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
817 {
818 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
819 						 router.neighs_update.dw.work);
820 	int err;
821 
822 	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
823 	if (err)
824 		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
825 
826 	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
827 
828 	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
829 }
830 
831 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
832 {
833 	struct mlxsw_sp_neigh_entry *neigh_entry;
834 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
835 						 router.nexthop_probe_dw.work);
836 
837 	/* Iterate over nexthop neighbours, find those who are unresolved and
838 	 * send arp on them. This solves the chicken-egg problem when
839 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
840 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
841 	 * using different nexthop.
842 	 *
843 	 * Take RTNL mutex here to prevent lists from changes.
844 	 */
845 	rtnl_lock();
846 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
847 			    nexthop_neighs_list_node)
848 		if (!neigh_entry->connected)
849 			neigh_event_send(neigh_entry->key.n, NULL);
850 	rtnl_unlock();
851 
852 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
853 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
854 }
855 
856 static void
857 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
858 			      struct mlxsw_sp_neigh_entry *neigh_entry,
859 			      bool removing);
860 
861 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
862 {
863 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
864 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
865 }
866 
867 static void
868 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
869 				struct mlxsw_sp_neigh_entry *neigh_entry,
870 				enum mlxsw_reg_rauht_op op)
871 {
872 	struct neighbour *n = neigh_entry->key.n;
873 	u32 dip = ntohl(*((__be32 *) n->primary_key));
874 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
875 
876 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
877 			      dip);
878 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
879 }
880 
881 static void
882 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
883 			    struct mlxsw_sp_neigh_entry *neigh_entry,
884 			    bool adding)
885 {
886 	if (!adding && !neigh_entry->connected)
887 		return;
888 	neigh_entry->connected = adding;
889 	if (neigh_entry->key.n->tbl == &arp_tbl)
890 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
891 						mlxsw_sp_rauht_op(adding));
892 	else
893 		WARN_ON_ONCE(1);
894 }
895 
896 struct mlxsw_sp_neigh_event_work {
897 	struct work_struct work;
898 	struct mlxsw_sp *mlxsw_sp;
899 	struct neighbour *n;
900 };
901 
902 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
903 {
904 	struct mlxsw_sp_neigh_event_work *neigh_work =
905 		container_of(work, struct mlxsw_sp_neigh_event_work, work);
906 	struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
907 	struct mlxsw_sp_neigh_entry *neigh_entry;
908 	struct neighbour *n = neigh_work->n;
909 	unsigned char ha[ETH_ALEN];
910 	bool entry_connected;
911 	u8 nud_state, dead;
912 
913 	/* If these parameters are changed after we release the lock,
914 	 * then we are guaranteed to receive another event letting us
915 	 * know about it.
916 	 */
917 	read_lock_bh(&n->lock);
918 	memcpy(ha, n->ha, ETH_ALEN);
919 	nud_state = n->nud_state;
920 	dead = n->dead;
921 	read_unlock_bh(&n->lock);
922 
923 	rtnl_lock();
924 	entry_connected = nud_state & NUD_VALID && !dead;
925 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
926 	if (!entry_connected && !neigh_entry)
927 		goto out;
928 	if (!neigh_entry) {
929 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
930 		if (IS_ERR(neigh_entry))
931 			goto out;
932 	}
933 
934 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
935 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
936 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
937 
938 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
939 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
940 
941 out:
942 	rtnl_unlock();
943 	neigh_release(n);
944 	kfree(neigh_work);
945 }
946 
947 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
948 				   unsigned long event, void *ptr)
949 {
950 	struct mlxsw_sp_neigh_event_work *neigh_work;
951 	struct mlxsw_sp_port *mlxsw_sp_port;
952 	struct mlxsw_sp *mlxsw_sp;
953 	unsigned long interval;
954 	struct neigh_parms *p;
955 	struct neighbour *n;
956 
957 	switch (event) {
958 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
959 		p = ptr;
960 
961 		/* We don't care about changes in the default table. */
962 		if (!p->dev || p->tbl != &arp_tbl)
963 			return NOTIFY_DONE;
964 
965 		/* We are in atomic context and can't take RTNL mutex,
966 		 * so use RCU variant to walk the device chain.
967 		 */
968 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
969 		if (!mlxsw_sp_port)
970 			return NOTIFY_DONE;
971 
972 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
973 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
974 		mlxsw_sp->router.neighs_update.interval = interval;
975 
976 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
977 		break;
978 	case NETEVENT_NEIGH_UPDATE:
979 		n = ptr;
980 
981 		if (n->tbl != &arp_tbl)
982 			return NOTIFY_DONE;
983 
984 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
985 		if (!mlxsw_sp_port)
986 			return NOTIFY_DONE;
987 
988 		neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
989 		if (!neigh_work) {
990 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
991 			return NOTIFY_BAD;
992 		}
993 
994 		INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
995 		neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
996 		neigh_work->n = n;
997 
998 		/* Take a reference to ensure the neighbour won't be
999 		 * destructed until we drop the reference in delayed
1000 		 * work.
1001 		 */
1002 		neigh_clone(n);
1003 		mlxsw_core_schedule_work(&neigh_work->work);
1004 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
1005 		break;
1006 	}
1007 
1008 	return NOTIFY_DONE;
1009 }
1010 
1011 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1012 {
1013 	int err;
1014 
1015 	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1016 			      &mlxsw_sp_neigh_ht_params);
1017 	if (err)
1018 		return err;
1019 
1020 	/* Initialize the polling interval according to the default
1021 	 * table.
1022 	 */
1023 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1024 
1025 	/* Create the delayed works for the activity_update */
1026 	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1027 			  mlxsw_sp_router_neighs_update_work);
1028 	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1029 			  mlxsw_sp_router_probe_unresolved_nexthops);
1030 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1031 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1032 	return 0;
1033 }
1034 
1035 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1036 {
1037 	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1038 	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1039 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1040 }
1041 
1042 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1043 				    const struct mlxsw_sp_rif *r)
1044 {
1045 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1046 
1047 	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1048 			     r->rif, r->addr);
1049 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1050 }
1051 
1052 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1053 					 struct mlxsw_sp_rif *r)
1054 {
1055 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1056 
1057 	mlxsw_sp_neigh_rif_flush(mlxsw_sp, r);
1058 	list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list,
1059 				 rif_list_node)
1060 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1061 }
1062 
1063 struct mlxsw_sp_nexthop_key {
1064 	struct fib_nh *fib_nh;
1065 };
1066 
1067 struct mlxsw_sp_nexthop {
1068 	struct list_head neigh_list_node; /* member of neigh entry list */
1069 	struct list_head rif_list_node;
1070 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1071 						* this belongs to
1072 						*/
1073 	struct rhash_head ht_node;
1074 	struct mlxsw_sp_nexthop_key key;
1075 	struct mlxsw_sp_rif *r;
1076 	u8 should_offload:1, /* set indicates this neigh is connected and
1077 			      * should be put to KVD linear area of this group.
1078 			      */
1079 	   offloaded:1, /* set in case the neigh is actually put into
1080 			 * KVD linear area of this group.
1081 			 */
1082 	   update:1; /* set indicates that MAC of this neigh should be
1083 		      * updated in HW
1084 		      */
1085 	struct mlxsw_sp_neigh_entry *neigh_entry;
1086 };
1087 
1088 struct mlxsw_sp_nexthop_group_key {
1089 	struct fib_info *fi;
1090 };
1091 
1092 struct mlxsw_sp_nexthop_group {
1093 	struct rhash_head ht_node;
1094 	struct list_head fib_list; /* list of fib entries that use this group */
1095 	struct mlxsw_sp_nexthop_group_key key;
1096 	u8 adj_index_valid:1,
1097 	   gateway:1; /* routes using the group use a gateway */
1098 	u32 adj_index;
1099 	u16 ecmp_size;
1100 	u16 count;
1101 	struct mlxsw_sp_nexthop nexthops[0];
1102 #define nh_rif	nexthops[0].r
1103 };
1104 
1105 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1106 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1107 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1108 	.key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1109 };
1110 
1111 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1112 					 struct mlxsw_sp_nexthop_group *nh_grp)
1113 {
1114 	return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1115 				      &nh_grp->ht_node,
1116 				      mlxsw_sp_nexthop_group_ht_params);
1117 }
1118 
1119 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1120 					  struct mlxsw_sp_nexthop_group *nh_grp)
1121 {
1122 	rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1123 			       &nh_grp->ht_node,
1124 			       mlxsw_sp_nexthop_group_ht_params);
1125 }
1126 
1127 static struct mlxsw_sp_nexthop_group *
1128 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1129 			      struct mlxsw_sp_nexthop_group_key key)
1130 {
1131 	return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1132 				      mlxsw_sp_nexthop_group_ht_params);
1133 }
1134 
1135 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1136 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1137 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1138 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
1139 };
1140 
1141 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1142 				   struct mlxsw_sp_nexthop *nh)
1143 {
1144 	return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1145 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1146 }
1147 
1148 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1149 				    struct mlxsw_sp_nexthop *nh)
1150 {
1151 	rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1152 			       mlxsw_sp_nexthop_ht_params);
1153 }
1154 
1155 static struct mlxsw_sp_nexthop *
1156 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1157 			struct mlxsw_sp_nexthop_key key)
1158 {
1159 	return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1160 				      mlxsw_sp_nexthop_ht_params);
1161 }
1162 
1163 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1164 					     struct mlxsw_sp_vr *vr,
1165 					     u32 adj_index, u16 ecmp_size,
1166 					     u32 new_adj_index,
1167 					     u16 new_ecmp_size)
1168 {
1169 	char raleu_pl[MLXSW_REG_RALEU_LEN];
1170 
1171 	mlxsw_reg_raleu_pack(raleu_pl,
1172 			     (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1173 			     adj_index, ecmp_size, new_adj_index,
1174 			     new_ecmp_size);
1175 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1176 }
1177 
1178 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1179 					  struct mlxsw_sp_nexthop_group *nh_grp,
1180 					  u32 old_adj_index, u16 old_ecmp_size)
1181 {
1182 	struct mlxsw_sp_fib_entry *fib_entry;
1183 	struct mlxsw_sp_vr *vr = NULL;
1184 	int err;
1185 
1186 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1187 		if (vr == fib_entry->fib_node->vr)
1188 			continue;
1189 		vr = fib_entry->fib_node->vr;
1190 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1191 							old_adj_index,
1192 							old_ecmp_size,
1193 							nh_grp->adj_index,
1194 							nh_grp->ecmp_size);
1195 		if (err)
1196 			return err;
1197 	}
1198 	return 0;
1199 }
1200 
1201 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1202 				       struct mlxsw_sp_nexthop *nh)
1203 {
1204 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1205 	char ratr_pl[MLXSW_REG_RATR_LEN];
1206 
1207 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1208 			    true, adj_index, neigh_entry->rif);
1209 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1210 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1211 }
1212 
1213 static int
1214 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1215 				  struct mlxsw_sp_nexthop_group *nh_grp,
1216 				  bool reallocate)
1217 {
1218 	u32 adj_index = nh_grp->adj_index; /* base */
1219 	struct mlxsw_sp_nexthop *nh;
1220 	int i;
1221 	int err;
1222 
1223 	for (i = 0; i < nh_grp->count; i++) {
1224 		nh = &nh_grp->nexthops[i];
1225 
1226 		if (!nh->should_offload) {
1227 			nh->offloaded = 0;
1228 			continue;
1229 		}
1230 
1231 		if (nh->update || reallocate) {
1232 			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1233 							  adj_index, nh);
1234 			if (err)
1235 				return err;
1236 			nh->update = 0;
1237 			nh->offloaded = 1;
1238 		}
1239 		adj_index++;
1240 	}
1241 	return 0;
1242 }
1243 
1244 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1245 				     struct mlxsw_sp_fib_entry *fib_entry);
1246 
1247 static int
1248 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1249 				    struct mlxsw_sp_nexthop_group *nh_grp)
1250 {
1251 	struct mlxsw_sp_fib_entry *fib_entry;
1252 	int err;
1253 
1254 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1255 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1256 		if (err)
1257 			return err;
1258 	}
1259 	return 0;
1260 }
1261 
1262 static void
1263 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1264 			       struct mlxsw_sp_nexthop_group *nh_grp)
1265 {
1266 	struct mlxsw_sp_nexthop *nh;
1267 	bool offload_change = false;
1268 	u32 adj_index;
1269 	u16 ecmp_size = 0;
1270 	bool old_adj_index_valid;
1271 	u32 old_adj_index;
1272 	u16 old_ecmp_size;
1273 	int ret;
1274 	int i;
1275 	int err;
1276 
1277 	if (!nh_grp->gateway) {
1278 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1279 		return;
1280 	}
1281 
1282 	for (i = 0; i < nh_grp->count; i++) {
1283 		nh = &nh_grp->nexthops[i];
1284 
1285 		if (nh->should_offload ^ nh->offloaded) {
1286 			offload_change = true;
1287 			if (nh->should_offload)
1288 				nh->update = 1;
1289 		}
1290 		if (nh->should_offload)
1291 			ecmp_size++;
1292 	}
1293 	if (!offload_change) {
1294 		/* Nothing was added or removed, so no need to reallocate. Just
1295 		 * update MAC on existing adjacency indexes.
1296 		 */
1297 		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1298 							false);
1299 		if (err) {
1300 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1301 			goto set_trap;
1302 		}
1303 		return;
1304 	}
1305 	if (!ecmp_size)
1306 		/* No neigh of this group is connected so we just set
1307 		 * the trap and let everthing flow through kernel.
1308 		 */
1309 		goto set_trap;
1310 
1311 	ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1312 	if (ret < 0) {
1313 		/* We ran out of KVD linear space, just set the
1314 		 * trap and let everything flow through kernel.
1315 		 */
1316 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1317 		goto set_trap;
1318 	}
1319 	adj_index = ret;
1320 	old_adj_index_valid = nh_grp->adj_index_valid;
1321 	old_adj_index = nh_grp->adj_index;
1322 	old_ecmp_size = nh_grp->ecmp_size;
1323 	nh_grp->adj_index_valid = 1;
1324 	nh_grp->adj_index = adj_index;
1325 	nh_grp->ecmp_size = ecmp_size;
1326 	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1327 	if (err) {
1328 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1329 		goto set_trap;
1330 	}
1331 
1332 	if (!old_adj_index_valid) {
1333 		/* The trap was set for fib entries, so we have to call
1334 		 * fib entry update to unset it and use adjacency index.
1335 		 */
1336 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1337 		if (err) {
1338 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1339 			goto set_trap;
1340 		}
1341 		return;
1342 	}
1343 
1344 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1345 					     old_adj_index, old_ecmp_size);
1346 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1347 	if (err) {
1348 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1349 		goto set_trap;
1350 	}
1351 	return;
1352 
1353 set_trap:
1354 	old_adj_index_valid = nh_grp->adj_index_valid;
1355 	nh_grp->adj_index_valid = 0;
1356 	for (i = 0; i < nh_grp->count; i++) {
1357 		nh = &nh_grp->nexthops[i];
1358 		nh->offloaded = 0;
1359 	}
1360 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1361 	if (err)
1362 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1363 	if (old_adj_index_valid)
1364 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1365 }
1366 
1367 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1368 					    bool removing)
1369 {
1370 	if (!removing && !nh->should_offload)
1371 		nh->should_offload = 1;
1372 	else if (removing && nh->offloaded)
1373 		nh->should_offload = 0;
1374 	nh->update = 1;
1375 }
1376 
1377 static void
1378 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1379 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1380 			      bool removing)
1381 {
1382 	struct mlxsw_sp_nexthop *nh;
1383 
1384 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
1385 			    neigh_list_node) {
1386 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
1387 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1388 	}
1389 }
1390 
1391 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1392 				      struct mlxsw_sp_rif *r)
1393 {
1394 	if (nh->r)
1395 		return;
1396 
1397 	nh->r = r;
1398 	list_add(&nh->rif_list_node, &r->nexthop_list);
1399 }
1400 
1401 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1402 {
1403 	if (!nh->r)
1404 		return;
1405 
1406 	list_del(&nh->rif_list_node);
1407 	nh->r = NULL;
1408 }
1409 
1410 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1411 				       struct mlxsw_sp_nexthop *nh)
1412 {
1413 	struct mlxsw_sp_neigh_entry *neigh_entry;
1414 	struct fib_nh *fib_nh = nh->key.fib_nh;
1415 	struct neighbour *n;
1416 	u8 nud_state, dead;
1417 	int err;
1418 
1419 	if (!nh->nh_grp->gateway || nh->neigh_entry)
1420 		return 0;
1421 
1422 	/* Take a reference of neigh here ensuring that neigh would
1423 	 * not be detructed before the nexthop entry is finished.
1424 	 * The reference is taken either in neigh_lookup() or
1425 	 * in neigh_create() in case n is not found.
1426 	 */
1427 	n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1428 	if (!n) {
1429 		n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1430 		if (IS_ERR(n))
1431 			return PTR_ERR(n);
1432 		neigh_event_send(n, NULL);
1433 	}
1434 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1435 	if (!neigh_entry) {
1436 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1437 		if (IS_ERR(neigh_entry)) {
1438 			err = -EINVAL;
1439 			goto err_neigh_entry_create;
1440 		}
1441 	}
1442 
1443 	/* If that is the first nexthop connected to that neigh, add to
1444 	 * nexthop_neighs_list
1445 	 */
1446 	if (list_empty(&neigh_entry->nexthop_list))
1447 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1448 			      &mlxsw_sp->router.nexthop_neighs_list);
1449 
1450 	nh->neigh_entry = neigh_entry;
1451 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1452 	read_lock_bh(&n->lock);
1453 	nud_state = n->nud_state;
1454 	dead = n->dead;
1455 	read_unlock_bh(&n->lock);
1456 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1457 
1458 	return 0;
1459 
1460 err_neigh_entry_create:
1461 	neigh_release(n);
1462 	return err;
1463 }
1464 
1465 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1466 					struct mlxsw_sp_nexthop *nh)
1467 {
1468 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1469 	struct neighbour *n;
1470 
1471 	if (!neigh_entry)
1472 		return;
1473 	n = neigh_entry->key.n;
1474 
1475 	__mlxsw_sp_nexthop_neigh_update(nh, true);
1476 	list_del(&nh->neigh_list_node);
1477 	nh->neigh_entry = NULL;
1478 
1479 	/* If that is the last nexthop connected to that neigh, remove from
1480 	 * nexthop_neighs_list
1481 	 */
1482 	if (list_empty(&neigh_entry->nexthop_list))
1483 		list_del(&neigh_entry->nexthop_neighs_list_node);
1484 
1485 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1486 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1487 
1488 	neigh_release(n);
1489 }
1490 
1491 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1492 				 struct mlxsw_sp_nexthop_group *nh_grp,
1493 				 struct mlxsw_sp_nexthop *nh,
1494 				 struct fib_nh *fib_nh)
1495 {
1496 	struct net_device *dev = fib_nh->nh_dev;
1497 	struct in_device *in_dev;
1498 	struct mlxsw_sp_rif *r;
1499 	int err;
1500 
1501 	nh->nh_grp = nh_grp;
1502 	nh->key.fib_nh = fib_nh;
1503 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1504 	if (err)
1505 		return err;
1506 
1507 	in_dev = __in_dev_get_rtnl(dev);
1508 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1509 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
1510 		return 0;
1511 
1512 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1513 	if (!r)
1514 		return 0;
1515 	mlxsw_sp_nexthop_rif_init(nh, r);
1516 
1517 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1518 	if (err)
1519 		goto err_nexthop_neigh_init;
1520 
1521 	return 0;
1522 
1523 err_nexthop_neigh_init:
1524 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1525 	return err;
1526 }
1527 
1528 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1529 				  struct mlxsw_sp_nexthop *nh)
1530 {
1531 	mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1532 	mlxsw_sp_nexthop_rif_fini(nh);
1533 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1534 }
1535 
1536 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1537 				   unsigned long event, struct fib_nh *fib_nh)
1538 {
1539 	struct mlxsw_sp_nexthop_key key;
1540 	struct mlxsw_sp_nexthop *nh;
1541 	struct mlxsw_sp_rif *r;
1542 
1543 	if (mlxsw_sp->router.aborted)
1544 		return;
1545 
1546 	key.fib_nh = fib_nh;
1547 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1548 	if (WARN_ON_ONCE(!nh))
1549 		return;
1550 
1551 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1552 	if (!r)
1553 		return;
1554 
1555 	switch (event) {
1556 	case FIB_EVENT_NH_ADD:
1557 		mlxsw_sp_nexthop_rif_init(nh, r);
1558 		mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1559 		break;
1560 	case FIB_EVENT_NH_DEL:
1561 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1562 		mlxsw_sp_nexthop_rif_fini(nh);
1563 		break;
1564 	}
1565 
1566 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1567 }
1568 
1569 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1570 					   struct mlxsw_sp_rif *r)
1571 {
1572 	struct mlxsw_sp_nexthop *nh, *tmp;
1573 
1574 	list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) {
1575 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1576 		mlxsw_sp_nexthop_rif_fini(nh);
1577 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1578 	}
1579 }
1580 
1581 static struct mlxsw_sp_nexthop_group *
1582 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1583 {
1584 	struct mlxsw_sp_nexthop_group *nh_grp;
1585 	struct mlxsw_sp_nexthop *nh;
1586 	struct fib_nh *fib_nh;
1587 	size_t alloc_size;
1588 	int i;
1589 	int err;
1590 
1591 	alloc_size = sizeof(*nh_grp) +
1592 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1593 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1594 	if (!nh_grp)
1595 		return ERR_PTR(-ENOMEM);
1596 	INIT_LIST_HEAD(&nh_grp->fib_list);
1597 	nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1598 	nh_grp->count = fi->fib_nhs;
1599 	nh_grp->key.fi = fi;
1600 	for (i = 0; i < nh_grp->count; i++) {
1601 		nh = &nh_grp->nexthops[i];
1602 		fib_nh = &fi->fib_nh[i];
1603 		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1604 		if (err)
1605 			goto err_nexthop_init;
1606 	}
1607 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1608 	if (err)
1609 		goto err_nexthop_group_insert;
1610 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1611 	return nh_grp;
1612 
1613 err_nexthop_group_insert:
1614 err_nexthop_init:
1615 	for (i--; i >= 0; i--) {
1616 		nh = &nh_grp->nexthops[i];
1617 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1618 	}
1619 	kfree(nh_grp);
1620 	return ERR_PTR(err);
1621 }
1622 
1623 static void
1624 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1625 			       struct mlxsw_sp_nexthop_group *nh_grp)
1626 {
1627 	struct mlxsw_sp_nexthop *nh;
1628 	int i;
1629 
1630 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1631 	for (i = 0; i < nh_grp->count; i++) {
1632 		nh = &nh_grp->nexthops[i];
1633 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1634 	}
1635 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1636 	WARN_ON_ONCE(nh_grp->adj_index_valid);
1637 	kfree(nh_grp);
1638 }
1639 
1640 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1641 				      struct mlxsw_sp_fib_entry *fib_entry,
1642 				      struct fib_info *fi)
1643 {
1644 	struct mlxsw_sp_nexthop_group_key key;
1645 	struct mlxsw_sp_nexthop_group *nh_grp;
1646 
1647 	key.fi = fi;
1648 	nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1649 	if (!nh_grp) {
1650 		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1651 		if (IS_ERR(nh_grp))
1652 			return PTR_ERR(nh_grp);
1653 	}
1654 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1655 	fib_entry->nh_group = nh_grp;
1656 	return 0;
1657 }
1658 
1659 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1660 				       struct mlxsw_sp_fib_entry *fib_entry)
1661 {
1662 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1663 
1664 	list_del(&fib_entry->nexthop_group_node);
1665 	if (!list_empty(&nh_grp->fib_list))
1666 		return;
1667 	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1668 }
1669 
1670 static bool
1671 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1672 {
1673 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1674 
1675 	if (fib_entry->params.tos)
1676 		return false;
1677 
1678 	switch (fib_entry->type) {
1679 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1680 		return !!nh_group->adj_index_valid;
1681 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1682 		return !!nh_group->nh_rif;
1683 	default:
1684 		return false;
1685 	}
1686 }
1687 
1688 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1689 {
1690 	fib_entry->offloaded = true;
1691 
1692 	switch (fib_entry->fib_node->vr->proto) {
1693 	case MLXSW_SP_L3_PROTO_IPV4:
1694 		fib_info_offload_inc(fib_entry->nh_group->key.fi);
1695 		break;
1696 	case MLXSW_SP_L3_PROTO_IPV6:
1697 		WARN_ON_ONCE(1);
1698 	}
1699 }
1700 
1701 static void
1702 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1703 {
1704 	switch (fib_entry->fib_node->vr->proto) {
1705 	case MLXSW_SP_L3_PROTO_IPV4:
1706 		fib_info_offload_dec(fib_entry->nh_group->key.fi);
1707 		break;
1708 	case MLXSW_SP_L3_PROTO_IPV6:
1709 		WARN_ON_ONCE(1);
1710 	}
1711 
1712 	fib_entry->offloaded = false;
1713 }
1714 
1715 static void
1716 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1717 				   enum mlxsw_reg_ralue_op op, int err)
1718 {
1719 	switch (op) {
1720 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1721 		if (!fib_entry->offloaded)
1722 			return;
1723 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1724 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1725 		if (err)
1726 			return;
1727 		if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1728 		    !fib_entry->offloaded)
1729 			mlxsw_sp_fib_entry_offload_set(fib_entry);
1730 		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1731 			 fib_entry->offloaded)
1732 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
1733 		return;
1734 	default:
1735 		return;
1736 	}
1737 }
1738 
1739 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1740 					 struct mlxsw_sp_fib_entry *fib_entry,
1741 					 enum mlxsw_reg_ralue_op op)
1742 {
1743 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1744 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1745 	struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1746 	enum mlxsw_reg_ralue_trap_action trap_action;
1747 	u16 trap_id = 0;
1748 	u32 adjacency_index = 0;
1749 	u16 ecmp_size = 0;
1750 
1751 	/* In case the nexthop group adjacency index is valid, use it
1752 	 * with provided ECMP size. Otherwise, setup trap and pass
1753 	 * traffic to kernel.
1754 	 */
1755 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1756 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1757 		adjacency_index = fib_entry->nh_group->adj_index;
1758 		ecmp_size = fib_entry->nh_group->ecmp_size;
1759 	} else {
1760 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1761 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1762 	}
1763 
1764 	mlxsw_reg_ralue_pack4(ralue_pl,
1765 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1766 			      vr->id, fib_entry->fib_node->key.prefix_len,
1767 			      *p_dip);
1768 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1769 					adjacency_index, ecmp_size);
1770 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1771 }
1772 
1773 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1774 					struct mlxsw_sp_fib_entry *fib_entry,
1775 					enum mlxsw_reg_ralue_op op)
1776 {
1777 	struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif;
1778 	enum mlxsw_reg_ralue_trap_action trap_action;
1779 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1780 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1781 	struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1782 	u16 trap_id = 0;
1783 	u16 rif = 0;
1784 
1785 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1786 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1787 		rif = r->rif;
1788 	} else {
1789 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1790 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1791 	}
1792 
1793 	mlxsw_reg_ralue_pack4(ralue_pl,
1794 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1795 			      vr->id, fib_entry->fib_node->key.prefix_len,
1796 			      *p_dip);
1797 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif);
1798 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1799 }
1800 
1801 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1802 				       struct mlxsw_sp_fib_entry *fib_entry,
1803 				       enum mlxsw_reg_ralue_op op)
1804 {
1805 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1806 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1807 	struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1808 
1809 	mlxsw_reg_ralue_pack4(ralue_pl,
1810 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1811 			      vr->id, fib_entry->fib_node->key.prefix_len,
1812 			      *p_dip);
1813 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1814 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1815 }
1816 
1817 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1818 				  struct mlxsw_sp_fib_entry *fib_entry,
1819 				  enum mlxsw_reg_ralue_op op)
1820 {
1821 	switch (fib_entry->type) {
1822 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1823 		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1824 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1825 		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1826 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1827 		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1828 	}
1829 	return -EINVAL;
1830 }
1831 
1832 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1833 				 struct mlxsw_sp_fib_entry *fib_entry,
1834 				 enum mlxsw_reg_ralue_op op)
1835 {
1836 	int err = -EINVAL;
1837 
1838 	switch (fib_entry->fib_node->vr->proto) {
1839 	case MLXSW_SP_L3_PROTO_IPV4:
1840 		err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1841 		break;
1842 	case MLXSW_SP_L3_PROTO_IPV6:
1843 		return err;
1844 	}
1845 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
1846 	return err;
1847 }
1848 
1849 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1850 				     struct mlxsw_sp_fib_entry *fib_entry)
1851 {
1852 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1853 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
1854 }
1855 
1856 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1857 				  struct mlxsw_sp_fib_entry *fib_entry)
1858 {
1859 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1860 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
1861 }
1862 
1863 static int
1864 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
1865 			     const struct fib_entry_notifier_info *fen_info,
1866 			     struct mlxsw_sp_fib_entry *fib_entry)
1867 {
1868 	struct fib_info *fi = fen_info->fi;
1869 
1870 	if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1871 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1872 		return 0;
1873 	}
1874 	if (fen_info->type != RTN_UNICAST)
1875 		return -EINVAL;
1876 	if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
1877 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1878 	else
1879 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1880 	return 0;
1881 }
1882 
1883 static struct mlxsw_sp_fib_entry *
1884 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
1885 			   struct mlxsw_sp_fib_node *fib_node,
1886 			   const struct fib_entry_notifier_info *fen_info)
1887 {
1888 	struct mlxsw_sp_fib_entry *fib_entry;
1889 	int err;
1890 
1891 	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
1892 	if (!fib_entry) {
1893 		err = -ENOMEM;
1894 		goto err_fib_entry_alloc;
1895 	}
1896 
1897 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
1898 	if (err)
1899 		goto err_fib4_entry_type_set;
1900 
1901 	err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
1902 	if (err)
1903 		goto err_nexthop_group_get;
1904 
1905 	fib_entry->params.prio = fen_info->fi->fib_priority;
1906 	fib_entry->params.tb_id = fen_info->tb_id;
1907 	fib_entry->params.type = fen_info->type;
1908 	fib_entry->params.tos = fen_info->tos;
1909 
1910 	fib_entry->fib_node = fib_node;
1911 
1912 	return fib_entry;
1913 
1914 err_nexthop_group_get:
1915 err_fib4_entry_type_set:
1916 	kfree(fib_entry);
1917 err_fib_entry_alloc:
1918 	return ERR_PTR(err);
1919 }
1920 
1921 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1922 					struct mlxsw_sp_fib_entry *fib_entry)
1923 {
1924 	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1925 	kfree(fib_entry);
1926 }
1927 
1928 static struct mlxsw_sp_fib_node *
1929 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
1930 		       const struct fib_entry_notifier_info *fen_info);
1931 
1932 static struct mlxsw_sp_fib_entry *
1933 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
1934 			   const struct fib_entry_notifier_info *fen_info)
1935 {
1936 	struct mlxsw_sp_fib_entry *fib_entry;
1937 	struct mlxsw_sp_fib_node *fib_node;
1938 
1939 	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
1940 	if (IS_ERR(fib_node))
1941 		return NULL;
1942 
1943 	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
1944 		if (fib_entry->params.tb_id == fen_info->tb_id &&
1945 		    fib_entry->params.tos == fen_info->tos &&
1946 		    fib_entry->params.type == fen_info->type &&
1947 		    fib_entry->nh_group->key.fi == fen_info->fi) {
1948 			return fib_entry;
1949 		}
1950 	}
1951 
1952 	return NULL;
1953 }
1954 
1955 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
1956 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
1957 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
1958 	.key_len = sizeof(struct mlxsw_sp_fib_key),
1959 	.automatic_shrinking = true,
1960 };
1961 
1962 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
1963 				    struct mlxsw_sp_fib_node *fib_node)
1964 {
1965 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
1966 				      mlxsw_sp_fib_ht_params);
1967 }
1968 
1969 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
1970 				     struct mlxsw_sp_fib_node *fib_node)
1971 {
1972 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
1973 			       mlxsw_sp_fib_ht_params);
1974 }
1975 
1976 static struct mlxsw_sp_fib_node *
1977 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1978 			 size_t addr_len, unsigned char prefix_len)
1979 {
1980 	struct mlxsw_sp_fib_key key;
1981 
1982 	memset(&key, 0, sizeof(key));
1983 	memcpy(key.addr, addr, addr_len);
1984 	key.prefix_len = prefix_len;
1985 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
1986 }
1987 
1988 static struct mlxsw_sp_fib_node *
1989 mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr,
1990 			 size_t addr_len, unsigned char prefix_len)
1991 {
1992 	struct mlxsw_sp_fib_node *fib_node;
1993 
1994 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
1995 	if (!fib_node)
1996 		return NULL;
1997 
1998 	INIT_LIST_HEAD(&fib_node->entry_list);
1999 	list_add(&fib_node->list, &vr->fib->node_list);
2000 	memcpy(fib_node->key.addr, addr, addr_len);
2001 	fib_node->key.prefix_len = prefix_len;
2002 	mlxsw_sp_fib_node_insert(vr->fib, fib_node);
2003 	fib_node->vr = vr;
2004 
2005 	return fib_node;
2006 }
2007 
2008 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2009 {
2010 	mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node);
2011 	list_del(&fib_node->list);
2012 	WARN_ON(!list_empty(&fib_node->entry_list));
2013 	kfree(fib_node);
2014 }
2015 
2016 static bool
2017 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2018 				 const struct mlxsw_sp_fib_entry *fib_entry)
2019 {
2020 	return list_first_entry(&fib_node->entry_list,
2021 				struct mlxsw_sp_fib_entry, list) == fib_entry;
2022 }
2023 
2024 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2025 {
2026 	unsigned char prefix_len = fib_node->key.prefix_len;
2027 	struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2028 
2029 	if (fib->prefix_ref_count[prefix_len]++ == 0)
2030 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2031 }
2032 
2033 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2034 {
2035 	unsigned char prefix_len = fib_node->key.prefix_len;
2036 	struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2037 
2038 	if (--fib->prefix_ref_count[prefix_len] == 0)
2039 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2040 }
2041 
2042 static struct mlxsw_sp_fib_node *
2043 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2044 		       const struct fib_entry_notifier_info *fen_info)
2045 {
2046 	struct mlxsw_sp_fib_node *fib_node;
2047 	struct mlxsw_sp_vr *vr;
2048 	int err;
2049 
2050 	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
2051 			     MLXSW_SP_L3_PROTO_IPV4);
2052 	if (IS_ERR(vr))
2053 		return ERR_CAST(vr);
2054 
2055 	fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst,
2056 					    sizeof(fen_info->dst),
2057 					    fen_info->dst_len);
2058 	if (fib_node)
2059 		return fib_node;
2060 
2061 	fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst,
2062 					    sizeof(fen_info->dst),
2063 					    fen_info->dst_len);
2064 	if (!fib_node) {
2065 		err = -ENOMEM;
2066 		goto err_fib_node_create;
2067 	}
2068 
2069 	return fib_node;
2070 
2071 err_fib_node_create:
2072 	mlxsw_sp_vr_put(mlxsw_sp, vr);
2073 	return ERR_PTR(err);
2074 }
2075 
2076 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2077 				   struct mlxsw_sp_fib_node *fib_node)
2078 {
2079 	struct mlxsw_sp_vr *vr = fib_node->vr;
2080 
2081 	if (!list_empty(&fib_node->entry_list))
2082 		return;
2083 	mlxsw_sp_fib_node_destroy(fib_node);
2084 	mlxsw_sp_vr_put(mlxsw_sp, vr);
2085 }
2086 
2087 static struct mlxsw_sp_fib_entry *
2088 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2089 			      const struct mlxsw_sp_fib_entry_params *params)
2090 {
2091 	struct mlxsw_sp_fib_entry *fib_entry;
2092 
2093 	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2094 		if (fib_entry->params.tb_id > params->tb_id)
2095 			continue;
2096 		if (fib_entry->params.tb_id != params->tb_id)
2097 			break;
2098 		if (fib_entry->params.tos > params->tos)
2099 			continue;
2100 		if (fib_entry->params.prio >= params->prio ||
2101 		    fib_entry->params.tos < params->tos)
2102 			return fib_entry;
2103 	}
2104 
2105 	return NULL;
2106 }
2107 
2108 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2109 					  struct mlxsw_sp_fib_entry *new_entry)
2110 {
2111 	struct mlxsw_sp_fib_node *fib_node;
2112 
2113 	if (WARN_ON(!fib_entry))
2114 		return -EINVAL;
2115 
2116 	fib_node = fib_entry->fib_node;
2117 	list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2118 		if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2119 		    fib_entry->params.tos != new_entry->params.tos ||
2120 		    fib_entry->params.prio != new_entry->params.prio)
2121 			break;
2122 	}
2123 
2124 	list_add_tail(&new_entry->list, &fib_entry->list);
2125 	return 0;
2126 }
2127 
2128 static int
2129 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2130 			       struct mlxsw_sp_fib_entry *new_entry,
2131 			       bool replace, bool append)
2132 {
2133 	struct mlxsw_sp_fib_entry *fib_entry;
2134 
2135 	fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2136 
2137 	if (append)
2138 		return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2139 	if (replace && WARN_ON(!fib_entry))
2140 		return -EINVAL;
2141 
2142 	/* Insert new entry before replaced one, so that we can later
2143 	 * remove the second.
2144 	 */
2145 	if (fib_entry) {
2146 		list_add_tail(&new_entry->list, &fib_entry->list);
2147 	} else {
2148 		struct mlxsw_sp_fib_entry *last;
2149 
2150 		list_for_each_entry(last, &fib_node->entry_list, list) {
2151 			if (new_entry->params.tb_id > last->params.tb_id)
2152 				break;
2153 			fib_entry = last;
2154 		}
2155 
2156 		if (fib_entry)
2157 			list_add(&new_entry->list, &fib_entry->list);
2158 		else
2159 			list_add(&new_entry->list, &fib_node->entry_list);
2160 	}
2161 
2162 	return 0;
2163 }
2164 
2165 static void
2166 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2167 {
2168 	list_del(&fib_entry->list);
2169 }
2170 
2171 static int
2172 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2173 			     const struct mlxsw_sp_fib_node *fib_node,
2174 			     struct mlxsw_sp_fib_entry *fib_entry)
2175 {
2176 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2177 		return 0;
2178 
2179 	/* To prevent packet loss, overwrite the previously offloaded
2180 	 * entry.
2181 	 */
2182 	if (!list_is_singular(&fib_node->entry_list)) {
2183 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2184 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2185 
2186 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2187 	}
2188 
2189 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2190 }
2191 
2192 static void
2193 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2194 			     const struct mlxsw_sp_fib_node *fib_node,
2195 			     struct mlxsw_sp_fib_entry *fib_entry)
2196 {
2197 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2198 		return;
2199 
2200 	/* Promote the next entry by overwriting the deleted entry */
2201 	if (!list_is_singular(&fib_node->entry_list)) {
2202 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2203 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2204 
2205 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2206 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2207 		return;
2208 	}
2209 
2210 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2211 }
2212 
2213 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2214 					 struct mlxsw_sp_fib_entry *fib_entry,
2215 					 bool replace, bool append)
2216 {
2217 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2218 	int err;
2219 
2220 	err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2221 					     append);
2222 	if (err)
2223 		return err;
2224 
2225 	err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2226 	if (err)
2227 		goto err_fib4_node_entry_add;
2228 
2229 	mlxsw_sp_fib_node_prefix_inc(fib_node);
2230 
2231 	return 0;
2232 
2233 err_fib4_node_entry_add:
2234 	mlxsw_sp_fib4_node_list_remove(fib_entry);
2235 	return err;
2236 }
2237 
2238 static void
2239 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2240 				struct mlxsw_sp_fib_entry *fib_entry)
2241 {
2242 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2243 
2244 	mlxsw_sp_fib_node_prefix_dec(fib_node);
2245 	mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2246 	mlxsw_sp_fib4_node_list_remove(fib_entry);
2247 }
2248 
2249 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2250 					struct mlxsw_sp_fib_entry *fib_entry,
2251 					bool replace)
2252 {
2253 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2254 	struct mlxsw_sp_fib_entry *replaced;
2255 
2256 	if (!replace)
2257 		return;
2258 
2259 	/* We inserted the new entry before replaced one */
2260 	replaced = list_next_entry(fib_entry, list);
2261 
2262 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2263 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2264 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2265 }
2266 
2267 static int
2268 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2269 			 const struct fib_entry_notifier_info *fen_info,
2270 			 bool replace, bool append)
2271 {
2272 	struct mlxsw_sp_fib_entry *fib_entry;
2273 	struct mlxsw_sp_fib_node *fib_node;
2274 	int err;
2275 
2276 	if (mlxsw_sp->router.aborted)
2277 		return 0;
2278 
2279 	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2280 	if (IS_ERR(fib_node)) {
2281 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2282 		return PTR_ERR(fib_node);
2283 	}
2284 
2285 	fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2286 	if (IS_ERR(fib_entry)) {
2287 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2288 		err = PTR_ERR(fib_entry);
2289 		goto err_fib4_entry_create;
2290 	}
2291 
2292 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2293 					    append);
2294 	if (err) {
2295 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2296 		goto err_fib4_node_entry_link;
2297 	}
2298 
2299 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2300 
2301 	return 0;
2302 
2303 err_fib4_node_entry_link:
2304 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2305 err_fib4_entry_create:
2306 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2307 	return err;
2308 }
2309 
2310 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2311 				     struct fib_entry_notifier_info *fen_info)
2312 {
2313 	struct mlxsw_sp_fib_entry *fib_entry;
2314 	struct mlxsw_sp_fib_node *fib_node;
2315 
2316 	if (mlxsw_sp->router.aborted)
2317 		return;
2318 
2319 	fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2320 	if (WARN_ON(!fib_entry))
2321 		return;
2322 	fib_node = fib_entry->fib_node;
2323 
2324 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2325 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2326 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2327 }
2328 
2329 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2330 {
2331 	char ralta_pl[MLXSW_REG_RALTA_LEN];
2332 	char ralst_pl[MLXSW_REG_RALST_LEN];
2333 	char raltb_pl[MLXSW_REG_RALTB_LEN];
2334 	char ralue_pl[MLXSW_REG_RALUE_LEN];
2335 	int err;
2336 
2337 	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2338 			     MLXSW_SP_LPM_TREE_MIN);
2339 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2340 	if (err)
2341 		return err;
2342 
2343 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2344 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2345 	if (err)
2346 		return err;
2347 
2348 	mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2349 			     MLXSW_SP_LPM_TREE_MIN);
2350 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
2351 	if (err)
2352 		return err;
2353 
2354 	mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2355 			      MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
2356 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2357 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2358 }
2359 
2360 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2361 				     struct mlxsw_sp_fib_node *fib_node)
2362 {
2363 	struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2364 
2365 	list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2366 		bool do_break = &tmp->list == &fib_node->entry_list;
2367 
2368 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2369 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2370 		mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2371 		/* Break when entry list is empty and node was freed.
2372 		 * Otherwise, we'll access freed memory in the next
2373 		 * iteration.
2374 		 */
2375 		if (do_break)
2376 			break;
2377 	}
2378 }
2379 
2380 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2381 				    struct mlxsw_sp_fib_node *fib_node)
2382 {
2383 	switch (fib_node->vr->proto) {
2384 	case MLXSW_SP_L3_PROTO_IPV4:
2385 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2386 		break;
2387 	case MLXSW_SP_L3_PROTO_IPV6:
2388 		WARN_ON_ONCE(1);
2389 		break;
2390 	}
2391 }
2392 
2393 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2394 {
2395 	struct mlxsw_sp_fib_node *fib_node, *tmp;
2396 	struct mlxsw_sp_vr *vr;
2397 	int i;
2398 
2399 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2400 		vr = &mlxsw_sp->router.vrs[i];
2401 
2402 		if (!vr->used)
2403 			continue;
2404 
2405 		list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list,
2406 					 list) {
2407 			bool do_break = &tmp->list == &vr->fib->node_list;
2408 
2409 			mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2410 			if (do_break)
2411 				break;
2412 		}
2413 	}
2414 }
2415 
2416 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2417 {
2418 	int err;
2419 
2420 	if (mlxsw_sp->router.aborted)
2421 		return;
2422 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2423 	mlxsw_sp_router_fib_flush(mlxsw_sp);
2424 	mlxsw_sp->router.aborted = true;
2425 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2426 	if (err)
2427 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2428 }
2429 
2430 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2431 {
2432 	char ritr_pl[MLXSW_REG_RITR_LEN];
2433 	int err;
2434 
2435 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2436 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2437 	if (WARN_ON_ONCE(err))
2438 		return err;
2439 
2440 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
2441 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2442 }
2443 
2444 void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2445 				   struct mlxsw_sp_rif *r)
2446 {
2447 	mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif);
2448 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r);
2449 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r);
2450 }
2451 
2452 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2453 {
2454 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
2455 	u64 max_rifs;
2456 	int err;
2457 
2458 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
2459 		return -EIO;
2460 
2461 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2462 	mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
2463 				 GFP_KERNEL);
2464 	if (!mlxsw_sp->rifs)
2465 		return -ENOMEM;
2466 
2467 	mlxsw_reg_rgcr_pack(rgcr_pl, true);
2468 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
2469 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2470 	if (err)
2471 		goto err_rgcr_fail;
2472 
2473 	return 0;
2474 
2475 err_rgcr_fail:
2476 	kfree(mlxsw_sp->rifs);
2477 	return err;
2478 }
2479 
2480 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2481 {
2482 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
2483 	int i;
2484 
2485 	mlxsw_reg_rgcr_pack(rgcr_pl, false);
2486 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2487 
2488 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2489 		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
2490 
2491 	kfree(mlxsw_sp->rifs);
2492 }
2493 
2494 struct mlxsw_sp_fib_event_work {
2495 	struct work_struct work;
2496 	union {
2497 		struct fib_entry_notifier_info fen_info;
2498 		struct fib_nh_notifier_info fnh_info;
2499 	};
2500 	struct mlxsw_sp *mlxsw_sp;
2501 	unsigned long event;
2502 };
2503 
2504 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2505 {
2506 	struct mlxsw_sp_fib_event_work *fib_work =
2507 		container_of(work, struct mlxsw_sp_fib_event_work, work);
2508 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2509 	bool replace, append;
2510 	int err;
2511 
2512 	/* Protect internal structures from changes */
2513 	rtnl_lock();
2514 	switch (fib_work->event) {
2515 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2516 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
2517 	case FIB_EVENT_ENTRY_ADD:
2518 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2519 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2520 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2521 					       replace, append);
2522 		if (err)
2523 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
2524 		fib_info_put(fib_work->fen_info.fi);
2525 		break;
2526 	case FIB_EVENT_ENTRY_DEL:
2527 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2528 		fib_info_put(fib_work->fen_info.fi);
2529 		break;
2530 	case FIB_EVENT_RULE_ADD: /* fall through */
2531 	case FIB_EVENT_RULE_DEL:
2532 		mlxsw_sp_router_fib4_abort(mlxsw_sp);
2533 		break;
2534 	case FIB_EVENT_NH_ADD: /* fall through */
2535 	case FIB_EVENT_NH_DEL:
2536 		mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2537 				       fib_work->fnh_info.fib_nh);
2538 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2539 		break;
2540 	}
2541 	rtnl_unlock();
2542 	kfree(fib_work);
2543 }
2544 
2545 /* Called with rcu_read_lock() */
2546 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2547 				     unsigned long event, void *ptr)
2548 {
2549 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2550 	struct mlxsw_sp_fib_event_work *fib_work;
2551 	struct fib_notifier_info *info = ptr;
2552 
2553 	if (!net_eq(info->net, &init_net))
2554 		return NOTIFY_DONE;
2555 
2556 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2557 	if (WARN_ON(!fib_work))
2558 		return NOTIFY_BAD;
2559 
2560 	INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2561 	fib_work->mlxsw_sp = mlxsw_sp;
2562 	fib_work->event = event;
2563 
2564 	switch (event) {
2565 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2566 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
2567 	case FIB_EVENT_ENTRY_ADD: /* fall through */
2568 	case FIB_EVENT_ENTRY_DEL:
2569 		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2570 		/* Take referece on fib_info to prevent it from being
2571 		 * freed while work is queued. Release it afterwards.
2572 		 */
2573 		fib_info_hold(fib_work->fen_info.fi);
2574 		break;
2575 	case FIB_EVENT_NH_ADD: /* fall through */
2576 	case FIB_EVENT_NH_DEL:
2577 		memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2578 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2579 		break;
2580 	}
2581 
2582 	mlxsw_core_schedule_work(&fib_work->work);
2583 
2584 	return NOTIFY_DONE;
2585 }
2586 
2587 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
2588 {
2589 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2590 
2591 	/* Flush pending FIB notifications and then flush the device's
2592 	 * table before requesting another dump. The FIB notification
2593 	 * block is unregistered, so no need to take RTNL.
2594 	 */
2595 	mlxsw_core_flush_owq();
2596 	mlxsw_sp_router_fib_flush(mlxsw_sp);
2597 }
2598 
2599 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2600 {
2601 	int err;
2602 
2603 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
2604 	err = __mlxsw_sp_router_init(mlxsw_sp);
2605 	if (err)
2606 		return err;
2607 
2608 	err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
2609 			      &mlxsw_sp_nexthop_ht_params);
2610 	if (err)
2611 		goto err_nexthop_ht_init;
2612 
2613 	err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
2614 			      &mlxsw_sp_nexthop_group_ht_params);
2615 	if (err)
2616 		goto err_nexthop_group_ht_init;
2617 
2618 	mlxsw_sp_lpm_init(mlxsw_sp);
2619 	err = mlxsw_sp_vrs_init(mlxsw_sp);
2620 	if (err)
2621 		goto err_vrs_init;
2622 
2623 	err = mlxsw_sp_neigh_init(mlxsw_sp);
2624 	if (err)
2625 		goto err_neigh_init;
2626 
2627 	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
2628 	err = register_fib_notifier(&mlxsw_sp->fib_nb,
2629 				    mlxsw_sp_router_fib_dump_flush);
2630 	if (err)
2631 		goto err_register_fib_notifier;
2632 
2633 	return 0;
2634 
2635 err_register_fib_notifier:
2636 	mlxsw_sp_neigh_fini(mlxsw_sp);
2637 err_neigh_init:
2638 	mlxsw_sp_vrs_fini(mlxsw_sp);
2639 err_vrs_init:
2640 	rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2641 err_nexthop_group_ht_init:
2642 	rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2643 err_nexthop_ht_init:
2644 	__mlxsw_sp_router_fini(mlxsw_sp);
2645 	return err;
2646 }
2647 
2648 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2649 {
2650 	unregister_fib_notifier(&mlxsw_sp->fib_nb);
2651 	mlxsw_sp_neigh_fini(mlxsw_sp);
2652 	mlxsw_sp_vrs_fini(mlxsw_sp);
2653 	rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2654 	rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2655 	__mlxsw_sp_router_fini(mlxsw_sp);
2656 }
2657