1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <net/netevent.h>
45 #include <net/neighbour.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 
49 #include "spectrum.h"
50 #include "core.h"
51 #include "reg.h"
52 
53 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
54 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
55 
56 static bool
57 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
58 			     struct mlxsw_sp_prefix_usage *prefix_usage2)
59 {
60 	unsigned char prefix;
61 
62 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
63 		if (!test_bit(prefix, prefix_usage2->b))
64 			return false;
65 	}
66 	return true;
67 }
68 
69 static bool
70 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
71 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
72 {
73 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
74 }
75 
76 static bool
77 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
78 {
79 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
80 
81 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
82 }
83 
84 static void
85 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
86 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
87 {
88 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
89 }
90 
91 static void
92 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
93 {
94 	memset(prefix_usage, 0, sizeof(*prefix_usage));
95 }
96 
97 static void
98 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
99 			  unsigned char prefix_len)
100 {
101 	set_bit(prefix_len, prefix_usage->b);
102 }
103 
104 static void
105 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
106 			    unsigned char prefix_len)
107 {
108 	clear_bit(prefix_len, prefix_usage->b);
109 }
110 
111 struct mlxsw_sp_fib_key {
112 	unsigned char addr[sizeof(struct in6_addr)];
113 	unsigned char prefix_len;
114 };
115 
116 enum mlxsw_sp_fib_entry_type {
117 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
120 };
121 
122 struct mlxsw_sp_nexthop_group;
123 
124 struct mlxsw_sp_fib_node {
125 	struct list_head entry_list;
126 	struct list_head list;
127 	struct rhash_head ht_node;
128 	struct mlxsw_sp_vr *vr;
129 	struct mlxsw_sp_fib_key key;
130 };
131 
132 struct mlxsw_sp_fib_entry_params {
133 	u32 tb_id;
134 	u32 prio;
135 	u8 tos;
136 	u8 type;
137 };
138 
139 struct mlxsw_sp_fib_entry {
140 	struct list_head list;
141 	struct mlxsw_sp_fib_node *fib_node;
142 	enum mlxsw_sp_fib_entry_type type;
143 	struct list_head nexthop_group_node;
144 	struct mlxsw_sp_nexthop_group *nh_group;
145 	struct mlxsw_sp_fib_entry_params params;
146 	bool offloaded;
147 };
148 
149 struct mlxsw_sp_fib {
150 	struct rhashtable ht;
151 	struct list_head node_list;
152 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
153 	struct mlxsw_sp_prefix_usage prefix_usage;
154 };
155 
156 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
157 
158 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
159 {
160 	struct mlxsw_sp_fib *fib;
161 	int err;
162 
163 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
164 	if (!fib)
165 		return ERR_PTR(-ENOMEM);
166 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
167 	if (err)
168 		goto err_rhashtable_init;
169 	INIT_LIST_HEAD(&fib->node_list);
170 	return fib;
171 
172 err_rhashtable_init:
173 	kfree(fib);
174 	return ERR_PTR(err);
175 }
176 
177 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
178 {
179 	WARN_ON(!list_empty(&fib->node_list));
180 	rhashtable_destroy(&fib->ht);
181 	kfree(fib);
182 }
183 
184 static struct mlxsw_sp_lpm_tree *
185 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
186 {
187 	static struct mlxsw_sp_lpm_tree *lpm_tree;
188 	int i;
189 
190 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
191 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
192 		if (lpm_tree->ref_count == 0) {
193 			if (one_reserved)
194 				one_reserved = false;
195 			else
196 				return lpm_tree;
197 		}
198 	}
199 	return NULL;
200 }
201 
202 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
203 				   struct mlxsw_sp_lpm_tree *lpm_tree)
204 {
205 	char ralta_pl[MLXSW_REG_RALTA_LEN];
206 
207 	mlxsw_reg_ralta_pack(ralta_pl, true,
208 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
209 			     lpm_tree->id);
210 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
211 }
212 
213 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
214 				  struct mlxsw_sp_lpm_tree *lpm_tree)
215 {
216 	char ralta_pl[MLXSW_REG_RALTA_LEN];
217 
218 	mlxsw_reg_ralta_pack(ralta_pl, false,
219 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
220 			     lpm_tree->id);
221 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
222 }
223 
224 static int
225 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
226 				  struct mlxsw_sp_prefix_usage *prefix_usage,
227 				  struct mlxsw_sp_lpm_tree *lpm_tree)
228 {
229 	char ralst_pl[MLXSW_REG_RALST_LEN];
230 	u8 root_bin = 0;
231 	u8 prefix;
232 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
233 
234 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
235 		root_bin = prefix;
236 
237 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
238 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
239 		if (prefix == 0)
240 			continue;
241 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
242 					 MLXSW_REG_RALST_BIN_NO_CHILD);
243 		last_prefix = prefix;
244 	}
245 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
246 }
247 
248 static struct mlxsw_sp_lpm_tree *
249 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
250 			 struct mlxsw_sp_prefix_usage *prefix_usage,
251 			 enum mlxsw_sp_l3proto proto, bool one_reserved)
252 {
253 	struct mlxsw_sp_lpm_tree *lpm_tree;
254 	int err;
255 
256 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
257 	if (!lpm_tree)
258 		return ERR_PTR(-EBUSY);
259 	lpm_tree->proto = proto;
260 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
261 	if (err)
262 		return ERR_PTR(err);
263 
264 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
265 						lpm_tree);
266 	if (err)
267 		goto err_left_struct_set;
268 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
269 	       sizeof(lpm_tree->prefix_usage));
270 	return lpm_tree;
271 
272 err_left_struct_set:
273 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
274 	return ERR_PTR(err);
275 }
276 
277 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
278 				     struct mlxsw_sp_lpm_tree *lpm_tree)
279 {
280 	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
281 }
282 
283 static struct mlxsw_sp_lpm_tree *
284 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
285 		      struct mlxsw_sp_prefix_usage *prefix_usage,
286 		      enum mlxsw_sp_l3proto proto, bool one_reserved)
287 {
288 	struct mlxsw_sp_lpm_tree *lpm_tree;
289 	int i;
290 
291 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
292 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
293 		if (lpm_tree->ref_count != 0 &&
294 		    lpm_tree->proto == proto &&
295 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
296 					     prefix_usage))
297 			goto inc_ref_count;
298 	}
299 	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
300 					    proto, one_reserved);
301 	if (IS_ERR(lpm_tree))
302 		return lpm_tree;
303 
304 inc_ref_count:
305 	lpm_tree->ref_count++;
306 	return lpm_tree;
307 }
308 
309 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
310 				 struct mlxsw_sp_lpm_tree *lpm_tree)
311 {
312 	if (--lpm_tree->ref_count == 0)
313 		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
314 	return 0;
315 }
316 
317 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
318 {
319 	struct mlxsw_sp_lpm_tree *lpm_tree;
320 	int i;
321 
322 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
323 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
324 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
325 	}
326 }
327 
328 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
329 {
330 	struct mlxsw_sp_vr *vr;
331 	int i;
332 
333 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
334 		vr = &mlxsw_sp->router.vrs[i];
335 		if (!vr->used)
336 			return vr;
337 	}
338 	return NULL;
339 }
340 
341 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
342 				     struct mlxsw_sp_vr *vr)
343 {
344 	char raltb_pl[MLXSW_REG_RALTB_LEN];
345 
346 	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
347 			     (enum mlxsw_reg_ralxx_protocol) vr->proto,
348 			     vr->lpm_tree->id);
349 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
350 }
351 
352 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
353 				       struct mlxsw_sp_vr *vr)
354 {
355 	char raltb_pl[MLXSW_REG_RALTB_LEN];
356 
357 	/* Bind to tree 0 which is default */
358 	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
359 			     (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
360 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
361 }
362 
363 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
364 {
365 	/* For our purpose, squash main and local table into one */
366 	if (tb_id == RT_TABLE_LOCAL)
367 		tb_id = RT_TABLE_MAIN;
368 	return tb_id;
369 }
370 
371 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
372 					    u32 tb_id,
373 					    enum mlxsw_sp_l3proto proto)
374 {
375 	struct mlxsw_sp_vr *vr;
376 	int i;
377 
378 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
379 
380 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
381 		vr = &mlxsw_sp->router.vrs[i];
382 		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
383 			return vr;
384 	}
385 	return NULL;
386 }
387 
388 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
389 					      unsigned char prefix_len,
390 					      u32 tb_id,
391 					      enum mlxsw_sp_l3proto proto)
392 {
393 	struct mlxsw_sp_prefix_usage req_prefix_usage;
394 	struct mlxsw_sp_lpm_tree *lpm_tree;
395 	struct mlxsw_sp_vr *vr;
396 	int err;
397 
398 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
399 	if (!vr)
400 		return ERR_PTR(-EBUSY);
401 	vr->fib = mlxsw_sp_fib_create();
402 	if (IS_ERR(vr->fib))
403 		return ERR_CAST(vr->fib);
404 
405 	vr->proto = proto;
406 	vr->tb_id = tb_id;
407 	mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
408 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
409 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
410 					 proto, true);
411 	if (IS_ERR(lpm_tree)) {
412 		err = PTR_ERR(lpm_tree);
413 		goto err_tree_get;
414 	}
415 	vr->lpm_tree = lpm_tree;
416 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
417 	if (err)
418 		goto err_tree_bind;
419 
420 	vr->used = true;
421 	return vr;
422 
423 err_tree_bind:
424 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
425 err_tree_get:
426 	mlxsw_sp_fib_destroy(vr->fib);
427 
428 	return ERR_PTR(err);
429 }
430 
431 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
432 				struct mlxsw_sp_vr *vr)
433 {
434 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
435 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
436 	mlxsw_sp_fib_destroy(vr->fib);
437 	vr->used = false;
438 }
439 
440 static int
441 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
442 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
443 {
444 	struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree;
445 	struct mlxsw_sp_lpm_tree *new_tree;
446 	int err;
447 
448 	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
449 		return 0;
450 
451 	new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
452 					 vr->proto, false);
453 	if (IS_ERR(new_tree)) {
454 		/* We failed to get a tree according to the required
455 		 * prefix usage. However, the current tree might be still good
456 		 * for us if our requirement is subset of the prefixes used
457 		 * in the tree.
458 		 */
459 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
460 						 &lpm_tree->prefix_usage))
461 			return 0;
462 		return PTR_ERR(new_tree);
463 	}
464 
465 	/* Prevent packet loss by overwriting existing binding */
466 	vr->lpm_tree = new_tree;
467 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
468 	if (err)
469 		goto err_tree_bind;
470 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
471 
472 	return 0;
473 
474 err_tree_bind:
475 	vr->lpm_tree = lpm_tree;
476 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
477 	return err;
478 }
479 
480 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
481 					   unsigned char prefix_len,
482 					   u32 tb_id,
483 					   enum mlxsw_sp_l3proto proto)
484 {
485 	struct mlxsw_sp_vr *vr;
486 	int err;
487 
488 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
489 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
490 	if (!vr) {
491 		vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
492 		if (IS_ERR(vr))
493 			return vr;
494 	} else {
495 		struct mlxsw_sp_prefix_usage req_prefix_usage;
496 
497 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
498 					  &vr->fib->prefix_usage);
499 		mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
500 		/* Need to replace LPM tree in case new prefix is required. */
501 		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
502 						 &req_prefix_usage);
503 		if (err)
504 			return ERR_PTR(err);
505 	}
506 	return vr;
507 }
508 
509 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
510 {
511 	/* Destroy virtual router entity in case the associated FIB is empty
512 	 * and allow it to be used for other tables in future. Otherwise,
513 	 * check if some prefix usage did not disappear and change tree if
514 	 * that is the case. Note that in case new, smaller tree cannot be
515 	 * allocated, the original one will be kept being used.
516 	 */
517 	if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
518 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
519 	else
520 		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
521 					   &vr->fib->prefix_usage);
522 }
523 
524 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
525 {
526 	struct mlxsw_sp_vr *vr;
527 	u64 max_vrs;
528 	int i;
529 
530 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
531 		return -EIO;
532 
533 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
534 	mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
535 				       GFP_KERNEL);
536 	if (!mlxsw_sp->router.vrs)
537 		return -ENOMEM;
538 
539 	for (i = 0; i < max_vrs; i++) {
540 		vr = &mlxsw_sp->router.vrs[i];
541 		vr->id = i;
542 	}
543 
544 	return 0;
545 }
546 
547 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
548 
549 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
550 {
551 	/* At this stage we're guaranteed not to have new incoming
552 	 * FIB notifications and the work queue is free from FIBs
553 	 * sitting on top of mlxsw netdevs. However, we can still
554 	 * have other FIBs queued. Flush the queue before flushing
555 	 * the device's tables. No need for locks, as we're the only
556 	 * writer.
557 	 */
558 	mlxsw_core_flush_owq();
559 	mlxsw_sp_router_fib_flush(mlxsw_sp);
560 	kfree(mlxsw_sp->router.vrs);
561 }
562 
563 struct mlxsw_sp_neigh_key {
564 	struct neighbour *n;
565 };
566 
567 struct mlxsw_sp_neigh_entry {
568 	struct list_head rif_list_node;
569 	struct rhash_head ht_node;
570 	struct mlxsw_sp_neigh_key key;
571 	u16 rif;
572 	bool connected;
573 	unsigned char ha[ETH_ALEN];
574 	struct list_head nexthop_list; /* list of nexthops using
575 					* this neigh entry
576 					*/
577 	struct list_head nexthop_neighs_list_node;
578 };
579 
580 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
581 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
582 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
583 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
584 };
585 
586 static struct mlxsw_sp_neigh_entry *
587 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
588 			   u16 rif)
589 {
590 	struct mlxsw_sp_neigh_entry *neigh_entry;
591 
592 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
593 	if (!neigh_entry)
594 		return NULL;
595 
596 	neigh_entry->key.n = n;
597 	neigh_entry->rif = rif;
598 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
599 
600 	return neigh_entry;
601 }
602 
603 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
604 {
605 	kfree(neigh_entry);
606 }
607 
608 static int
609 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
610 			    struct mlxsw_sp_neigh_entry *neigh_entry)
611 {
612 	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
613 				      &neigh_entry->ht_node,
614 				      mlxsw_sp_neigh_ht_params);
615 }
616 
617 static void
618 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
619 			    struct mlxsw_sp_neigh_entry *neigh_entry)
620 {
621 	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
622 			       &neigh_entry->ht_node,
623 			       mlxsw_sp_neigh_ht_params);
624 }
625 
626 static struct mlxsw_sp_neigh_entry *
627 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
628 {
629 	struct mlxsw_sp_neigh_entry *neigh_entry;
630 	struct mlxsw_sp_rif *r;
631 	int err;
632 
633 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
634 	if (!r)
635 		return ERR_PTR(-EINVAL);
636 
637 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
638 	if (!neigh_entry)
639 		return ERR_PTR(-ENOMEM);
640 
641 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
642 	if (err)
643 		goto err_neigh_entry_insert;
644 
645 	list_add(&neigh_entry->rif_list_node, &r->neigh_list);
646 
647 	return neigh_entry;
648 
649 err_neigh_entry_insert:
650 	mlxsw_sp_neigh_entry_free(neigh_entry);
651 	return ERR_PTR(err);
652 }
653 
654 static void
655 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
656 			     struct mlxsw_sp_neigh_entry *neigh_entry)
657 {
658 	list_del(&neigh_entry->rif_list_node);
659 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
660 	mlxsw_sp_neigh_entry_free(neigh_entry);
661 }
662 
663 static struct mlxsw_sp_neigh_entry *
664 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
665 {
666 	struct mlxsw_sp_neigh_key key;
667 
668 	key.n = n;
669 	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
670 				      &key, mlxsw_sp_neigh_ht_params);
671 }
672 
673 static void
674 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
675 {
676 	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
677 
678 	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
679 }
680 
681 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
682 						   char *rauhtd_pl,
683 						   int ent_index)
684 {
685 	struct net_device *dev;
686 	struct neighbour *n;
687 	__be32 dipn;
688 	u32 dip;
689 	u16 rif;
690 
691 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
692 
693 	if (!mlxsw_sp->rifs[rif]) {
694 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
695 		return;
696 	}
697 
698 	dipn = htonl(dip);
699 	dev = mlxsw_sp->rifs[rif]->dev;
700 	n = neigh_lookup(&arp_tbl, &dipn, dev);
701 	if (!n) {
702 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
703 			   &dip);
704 		return;
705 	}
706 
707 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
708 	neigh_event_send(n, NULL);
709 	neigh_release(n);
710 }
711 
712 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
713 						   char *rauhtd_pl,
714 						   int rec_index)
715 {
716 	u8 num_entries;
717 	int i;
718 
719 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
720 								rec_index);
721 	/* Hardware starts counting at 0, so add 1. */
722 	num_entries++;
723 
724 	/* Each record consists of several neighbour entries. */
725 	for (i = 0; i < num_entries; i++) {
726 		int ent_index;
727 
728 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
729 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
730 						       ent_index);
731 	}
732 
733 }
734 
735 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
736 					      char *rauhtd_pl, int rec_index)
737 {
738 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
739 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
740 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
741 						       rec_index);
742 		break;
743 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
744 		WARN_ON_ONCE(1);
745 		break;
746 	}
747 }
748 
749 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
750 {
751 	u8 num_rec, last_rec_index, num_entries;
752 
753 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
754 	last_rec_index = num_rec - 1;
755 
756 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
757 		return false;
758 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
759 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
760 		return true;
761 
762 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
763 								last_rec_index);
764 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
765 		return true;
766 	return false;
767 }
768 
769 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
770 {
771 	char *rauhtd_pl;
772 	u8 num_rec;
773 	int i, err;
774 
775 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
776 	if (!rauhtd_pl)
777 		return -ENOMEM;
778 
779 	/* Make sure the neighbour's netdev isn't removed in the
780 	 * process.
781 	 */
782 	rtnl_lock();
783 	do {
784 		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
785 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
786 				      rauhtd_pl);
787 		if (err) {
788 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
789 			break;
790 		}
791 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
792 		for (i = 0; i < num_rec; i++)
793 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
794 							  i);
795 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
796 	rtnl_unlock();
797 
798 	kfree(rauhtd_pl);
799 	return err;
800 }
801 
802 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
803 {
804 	struct mlxsw_sp_neigh_entry *neigh_entry;
805 
806 	/* Take RTNL mutex here to prevent lists from changes */
807 	rtnl_lock();
808 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
809 			    nexthop_neighs_list_node)
810 		/* If this neigh have nexthops, make the kernel think this neigh
811 		 * is active regardless of the traffic.
812 		 */
813 		neigh_event_send(neigh_entry->key.n, NULL);
814 	rtnl_unlock();
815 }
816 
817 static void
818 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
819 {
820 	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
821 
822 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
823 			       msecs_to_jiffies(interval));
824 }
825 
826 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
827 {
828 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
829 						 router.neighs_update.dw.work);
830 	int err;
831 
832 	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
833 	if (err)
834 		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
835 
836 	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
837 
838 	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
839 }
840 
841 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
842 {
843 	struct mlxsw_sp_neigh_entry *neigh_entry;
844 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
845 						 router.nexthop_probe_dw.work);
846 
847 	/* Iterate over nexthop neighbours, find those who are unresolved and
848 	 * send arp on them. This solves the chicken-egg problem when
849 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
850 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
851 	 * using different nexthop.
852 	 *
853 	 * Take RTNL mutex here to prevent lists from changes.
854 	 */
855 	rtnl_lock();
856 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
857 			    nexthop_neighs_list_node)
858 		if (!neigh_entry->connected)
859 			neigh_event_send(neigh_entry->key.n, NULL);
860 	rtnl_unlock();
861 
862 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
863 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
864 }
865 
866 static void
867 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
868 			      struct mlxsw_sp_neigh_entry *neigh_entry,
869 			      bool removing);
870 
871 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
872 {
873 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
874 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
875 }
876 
877 static void
878 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
879 				struct mlxsw_sp_neigh_entry *neigh_entry,
880 				enum mlxsw_reg_rauht_op op)
881 {
882 	struct neighbour *n = neigh_entry->key.n;
883 	u32 dip = ntohl(*((__be32 *) n->primary_key));
884 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
885 
886 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
887 			      dip);
888 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
889 }
890 
891 static void
892 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
893 			    struct mlxsw_sp_neigh_entry *neigh_entry,
894 			    bool adding)
895 {
896 	if (!adding && !neigh_entry->connected)
897 		return;
898 	neigh_entry->connected = adding;
899 	if (neigh_entry->key.n->tbl == &arp_tbl)
900 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
901 						mlxsw_sp_rauht_op(adding));
902 	else
903 		WARN_ON_ONCE(1);
904 }
905 
906 struct mlxsw_sp_neigh_event_work {
907 	struct work_struct work;
908 	struct mlxsw_sp *mlxsw_sp;
909 	struct neighbour *n;
910 };
911 
912 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
913 {
914 	struct mlxsw_sp_neigh_event_work *neigh_work =
915 		container_of(work, struct mlxsw_sp_neigh_event_work, work);
916 	struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
917 	struct mlxsw_sp_neigh_entry *neigh_entry;
918 	struct neighbour *n = neigh_work->n;
919 	unsigned char ha[ETH_ALEN];
920 	bool entry_connected;
921 	u8 nud_state, dead;
922 
923 	/* If these parameters are changed after we release the lock,
924 	 * then we are guaranteed to receive another event letting us
925 	 * know about it.
926 	 */
927 	read_lock_bh(&n->lock);
928 	memcpy(ha, n->ha, ETH_ALEN);
929 	nud_state = n->nud_state;
930 	dead = n->dead;
931 	read_unlock_bh(&n->lock);
932 
933 	rtnl_lock();
934 	entry_connected = nud_state & NUD_VALID && !dead;
935 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
936 	if (!entry_connected && !neigh_entry)
937 		goto out;
938 	if (!neigh_entry) {
939 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
940 		if (IS_ERR(neigh_entry))
941 			goto out;
942 	}
943 
944 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
945 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
946 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
947 
948 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
949 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
950 
951 out:
952 	rtnl_unlock();
953 	neigh_release(n);
954 	kfree(neigh_work);
955 }
956 
957 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
958 				   unsigned long event, void *ptr)
959 {
960 	struct mlxsw_sp_neigh_event_work *neigh_work;
961 	struct mlxsw_sp_port *mlxsw_sp_port;
962 	struct mlxsw_sp *mlxsw_sp;
963 	unsigned long interval;
964 	struct neigh_parms *p;
965 	struct neighbour *n;
966 
967 	switch (event) {
968 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
969 		p = ptr;
970 
971 		/* We don't care about changes in the default table. */
972 		if (!p->dev || p->tbl != &arp_tbl)
973 			return NOTIFY_DONE;
974 
975 		/* We are in atomic context and can't take RTNL mutex,
976 		 * so use RCU variant to walk the device chain.
977 		 */
978 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
979 		if (!mlxsw_sp_port)
980 			return NOTIFY_DONE;
981 
982 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
983 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
984 		mlxsw_sp->router.neighs_update.interval = interval;
985 
986 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
987 		break;
988 	case NETEVENT_NEIGH_UPDATE:
989 		n = ptr;
990 
991 		if (n->tbl != &arp_tbl)
992 			return NOTIFY_DONE;
993 
994 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
995 		if (!mlxsw_sp_port)
996 			return NOTIFY_DONE;
997 
998 		neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
999 		if (!neigh_work) {
1000 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
1001 			return NOTIFY_BAD;
1002 		}
1003 
1004 		INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1005 		neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1006 		neigh_work->n = n;
1007 
1008 		/* Take a reference to ensure the neighbour won't be
1009 		 * destructed until we drop the reference in delayed
1010 		 * work.
1011 		 */
1012 		neigh_clone(n);
1013 		mlxsw_core_schedule_work(&neigh_work->work);
1014 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
1015 		break;
1016 	}
1017 
1018 	return NOTIFY_DONE;
1019 }
1020 
1021 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1022 {
1023 	int err;
1024 
1025 	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1026 			      &mlxsw_sp_neigh_ht_params);
1027 	if (err)
1028 		return err;
1029 
1030 	/* Initialize the polling interval according to the default
1031 	 * table.
1032 	 */
1033 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1034 
1035 	/* Create the delayed works for the activity_update */
1036 	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1037 			  mlxsw_sp_router_neighs_update_work);
1038 	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1039 			  mlxsw_sp_router_probe_unresolved_nexthops);
1040 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1041 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1042 	return 0;
1043 }
1044 
1045 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1046 {
1047 	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1048 	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1049 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1050 }
1051 
1052 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1053 				    const struct mlxsw_sp_rif *r)
1054 {
1055 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1056 
1057 	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1058 			     r->rif, r->addr);
1059 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1060 }
1061 
1062 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1063 					 struct mlxsw_sp_rif *r)
1064 {
1065 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1066 
1067 	mlxsw_sp_neigh_rif_flush(mlxsw_sp, r);
1068 	list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list,
1069 				 rif_list_node)
1070 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1071 }
1072 
1073 struct mlxsw_sp_nexthop_key {
1074 	struct fib_nh *fib_nh;
1075 };
1076 
1077 struct mlxsw_sp_nexthop {
1078 	struct list_head neigh_list_node; /* member of neigh entry list */
1079 	struct list_head rif_list_node;
1080 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1081 						* this belongs to
1082 						*/
1083 	struct rhash_head ht_node;
1084 	struct mlxsw_sp_nexthop_key key;
1085 	struct mlxsw_sp_rif *r;
1086 	u8 should_offload:1, /* set indicates this neigh is connected and
1087 			      * should be put to KVD linear area of this group.
1088 			      */
1089 	   offloaded:1, /* set in case the neigh is actually put into
1090 			 * KVD linear area of this group.
1091 			 */
1092 	   update:1; /* set indicates that MAC of this neigh should be
1093 		      * updated in HW
1094 		      */
1095 	struct mlxsw_sp_neigh_entry *neigh_entry;
1096 };
1097 
1098 struct mlxsw_sp_nexthop_group_key {
1099 	struct fib_info *fi;
1100 };
1101 
1102 struct mlxsw_sp_nexthop_group {
1103 	struct rhash_head ht_node;
1104 	struct list_head fib_list; /* list of fib entries that use this group */
1105 	struct mlxsw_sp_nexthop_group_key key;
1106 	u8 adj_index_valid:1,
1107 	   gateway:1; /* routes using the group use a gateway */
1108 	u32 adj_index;
1109 	u16 ecmp_size;
1110 	u16 count;
1111 	struct mlxsw_sp_nexthop nexthops[0];
1112 #define nh_rif	nexthops[0].r
1113 };
1114 
1115 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1116 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1117 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1118 	.key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1119 };
1120 
1121 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1122 					 struct mlxsw_sp_nexthop_group *nh_grp)
1123 {
1124 	return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1125 				      &nh_grp->ht_node,
1126 				      mlxsw_sp_nexthop_group_ht_params);
1127 }
1128 
1129 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1130 					  struct mlxsw_sp_nexthop_group *nh_grp)
1131 {
1132 	rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1133 			       &nh_grp->ht_node,
1134 			       mlxsw_sp_nexthop_group_ht_params);
1135 }
1136 
1137 static struct mlxsw_sp_nexthop_group *
1138 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1139 			      struct mlxsw_sp_nexthop_group_key key)
1140 {
1141 	return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1142 				      mlxsw_sp_nexthop_group_ht_params);
1143 }
1144 
1145 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1146 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1147 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1148 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
1149 };
1150 
1151 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1152 				   struct mlxsw_sp_nexthop *nh)
1153 {
1154 	return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1155 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1156 }
1157 
1158 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1159 				    struct mlxsw_sp_nexthop *nh)
1160 {
1161 	rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1162 			       mlxsw_sp_nexthop_ht_params);
1163 }
1164 
1165 static struct mlxsw_sp_nexthop *
1166 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1167 			struct mlxsw_sp_nexthop_key key)
1168 {
1169 	return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1170 				      mlxsw_sp_nexthop_ht_params);
1171 }
1172 
1173 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1174 					     struct mlxsw_sp_vr *vr,
1175 					     u32 adj_index, u16 ecmp_size,
1176 					     u32 new_adj_index,
1177 					     u16 new_ecmp_size)
1178 {
1179 	char raleu_pl[MLXSW_REG_RALEU_LEN];
1180 
1181 	mlxsw_reg_raleu_pack(raleu_pl,
1182 			     (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1183 			     adj_index, ecmp_size, new_adj_index,
1184 			     new_ecmp_size);
1185 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1186 }
1187 
1188 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1189 					  struct mlxsw_sp_nexthop_group *nh_grp,
1190 					  u32 old_adj_index, u16 old_ecmp_size)
1191 {
1192 	struct mlxsw_sp_fib_entry *fib_entry;
1193 	struct mlxsw_sp_vr *vr = NULL;
1194 	int err;
1195 
1196 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1197 		if (vr == fib_entry->fib_node->vr)
1198 			continue;
1199 		vr = fib_entry->fib_node->vr;
1200 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1201 							old_adj_index,
1202 							old_ecmp_size,
1203 							nh_grp->adj_index,
1204 							nh_grp->ecmp_size);
1205 		if (err)
1206 			return err;
1207 	}
1208 	return 0;
1209 }
1210 
1211 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1212 				       struct mlxsw_sp_nexthop *nh)
1213 {
1214 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1215 	char ratr_pl[MLXSW_REG_RATR_LEN];
1216 
1217 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1218 			    true, adj_index, neigh_entry->rif);
1219 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1220 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1221 }
1222 
1223 static int
1224 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1225 				  struct mlxsw_sp_nexthop_group *nh_grp,
1226 				  bool reallocate)
1227 {
1228 	u32 adj_index = nh_grp->adj_index; /* base */
1229 	struct mlxsw_sp_nexthop *nh;
1230 	int i;
1231 	int err;
1232 
1233 	for (i = 0; i < nh_grp->count; i++) {
1234 		nh = &nh_grp->nexthops[i];
1235 
1236 		if (!nh->should_offload) {
1237 			nh->offloaded = 0;
1238 			continue;
1239 		}
1240 
1241 		if (nh->update || reallocate) {
1242 			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1243 							  adj_index, nh);
1244 			if (err)
1245 				return err;
1246 			nh->update = 0;
1247 			nh->offloaded = 1;
1248 		}
1249 		adj_index++;
1250 	}
1251 	return 0;
1252 }
1253 
1254 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1255 				     struct mlxsw_sp_fib_entry *fib_entry);
1256 
1257 static int
1258 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1259 				    struct mlxsw_sp_nexthop_group *nh_grp)
1260 {
1261 	struct mlxsw_sp_fib_entry *fib_entry;
1262 	int err;
1263 
1264 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1265 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1266 		if (err)
1267 			return err;
1268 	}
1269 	return 0;
1270 }
1271 
1272 static void
1273 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1274 			       struct mlxsw_sp_nexthop_group *nh_grp)
1275 {
1276 	struct mlxsw_sp_nexthop *nh;
1277 	bool offload_change = false;
1278 	u32 adj_index;
1279 	u16 ecmp_size = 0;
1280 	bool old_adj_index_valid;
1281 	u32 old_adj_index;
1282 	u16 old_ecmp_size;
1283 	int ret;
1284 	int i;
1285 	int err;
1286 
1287 	if (!nh_grp->gateway) {
1288 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1289 		return;
1290 	}
1291 
1292 	for (i = 0; i < nh_grp->count; i++) {
1293 		nh = &nh_grp->nexthops[i];
1294 
1295 		if (nh->should_offload ^ nh->offloaded) {
1296 			offload_change = true;
1297 			if (nh->should_offload)
1298 				nh->update = 1;
1299 		}
1300 		if (nh->should_offload)
1301 			ecmp_size++;
1302 	}
1303 	if (!offload_change) {
1304 		/* Nothing was added or removed, so no need to reallocate. Just
1305 		 * update MAC on existing adjacency indexes.
1306 		 */
1307 		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1308 							false);
1309 		if (err) {
1310 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1311 			goto set_trap;
1312 		}
1313 		return;
1314 	}
1315 	if (!ecmp_size)
1316 		/* No neigh of this group is connected so we just set
1317 		 * the trap and let everthing flow through kernel.
1318 		 */
1319 		goto set_trap;
1320 
1321 	ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1322 	if (ret < 0) {
1323 		/* We ran out of KVD linear space, just set the
1324 		 * trap and let everything flow through kernel.
1325 		 */
1326 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1327 		goto set_trap;
1328 	}
1329 	adj_index = ret;
1330 	old_adj_index_valid = nh_grp->adj_index_valid;
1331 	old_adj_index = nh_grp->adj_index;
1332 	old_ecmp_size = nh_grp->ecmp_size;
1333 	nh_grp->adj_index_valid = 1;
1334 	nh_grp->adj_index = adj_index;
1335 	nh_grp->ecmp_size = ecmp_size;
1336 	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1337 	if (err) {
1338 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1339 		goto set_trap;
1340 	}
1341 
1342 	if (!old_adj_index_valid) {
1343 		/* The trap was set for fib entries, so we have to call
1344 		 * fib entry update to unset it and use adjacency index.
1345 		 */
1346 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1347 		if (err) {
1348 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1349 			goto set_trap;
1350 		}
1351 		return;
1352 	}
1353 
1354 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1355 					     old_adj_index, old_ecmp_size);
1356 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1357 	if (err) {
1358 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1359 		goto set_trap;
1360 	}
1361 	return;
1362 
1363 set_trap:
1364 	old_adj_index_valid = nh_grp->adj_index_valid;
1365 	nh_grp->adj_index_valid = 0;
1366 	for (i = 0; i < nh_grp->count; i++) {
1367 		nh = &nh_grp->nexthops[i];
1368 		nh->offloaded = 0;
1369 	}
1370 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1371 	if (err)
1372 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1373 	if (old_adj_index_valid)
1374 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1375 }
1376 
1377 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1378 					    bool removing)
1379 {
1380 	if (!removing && !nh->should_offload)
1381 		nh->should_offload = 1;
1382 	else if (removing && nh->offloaded)
1383 		nh->should_offload = 0;
1384 	nh->update = 1;
1385 }
1386 
1387 static void
1388 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1389 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1390 			      bool removing)
1391 {
1392 	struct mlxsw_sp_nexthop *nh;
1393 
1394 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
1395 			    neigh_list_node) {
1396 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
1397 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1398 	}
1399 }
1400 
1401 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1402 				      struct mlxsw_sp_rif *r)
1403 {
1404 	if (nh->r)
1405 		return;
1406 
1407 	nh->r = r;
1408 	list_add(&nh->rif_list_node, &r->nexthop_list);
1409 }
1410 
1411 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1412 {
1413 	if (!nh->r)
1414 		return;
1415 
1416 	list_del(&nh->rif_list_node);
1417 	nh->r = NULL;
1418 }
1419 
1420 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1421 				       struct mlxsw_sp_nexthop *nh)
1422 {
1423 	struct mlxsw_sp_neigh_entry *neigh_entry;
1424 	struct fib_nh *fib_nh = nh->key.fib_nh;
1425 	struct neighbour *n;
1426 	u8 nud_state, dead;
1427 	int err;
1428 
1429 	if (!nh->nh_grp->gateway || nh->neigh_entry)
1430 		return 0;
1431 
1432 	/* Take a reference of neigh here ensuring that neigh would
1433 	 * not be detructed before the nexthop entry is finished.
1434 	 * The reference is taken either in neigh_lookup() or
1435 	 * in neigh_create() in case n is not found.
1436 	 */
1437 	n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1438 	if (!n) {
1439 		n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1440 		if (IS_ERR(n))
1441 			return PTR_ERR(n);
1442 		neigh_event_send(n, NULL);
1443 	}
1444 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1445 	if (!neigh_entry) {
1446 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1447 		if (IS_ERR(neigh_entry)) {
1448 			err = -EINVAL;
1449 			goto err_neigh_entry_create;
1450 		}
1451 	}
1452 
1453 	/* If that is the first nexthop connected to that neigh, add to
1454 	 * nexthop_neighs_list
1455 	 */
1456 	if (list_empty(&neigh_entry->nexthop_list))
1457 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1458 			      &mlxsw_sp->router.nexthop_neighs_list);
1459 
1460 	nh->neigh_entry = neigh_entry;
1461 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1462 	read_lock_bh(&n->lock);
1463 	nud_state = n->nud_state;
1464 	dead = n->dead;
1465 	read_unlock_bh(&n->lock);
1466 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1467 
1468 	return 0;
1469 
1470 err_neigh_entry_create:
1471 	neigh_release(n);
1472 	return err;
1473 }
1474 
1475 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1476 					struct mlxsw_sp_nexthop *nh)
1477 {
1478 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1479 	struct neighbour *n;
1480 
1481 	if (!neigh_entry)
1482 		return;
1483 	n = neigh_entry->key.n;
1484 
1485 	__mlxsw_sp_nexthop_neigh_update(nh, true);
1486 	list_del(&nh->neigh_list_node);
1487 	nh->neigh_entry = NULL;
1488 
1489 	/* If that is the last nexthop connected to that neigh, remove from
1490 	 * nexthop_neighs_list
1491 	 */
1492 	if (list_empty(&neigh_entry->nexthop_list))
1493 		list_del(&neigh_entry->nexthop_neighs_list_node);
1494 
1495 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1496 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1497 
1498 	neigh_release(n);
1499 }
1500 
1501 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1502 				 struct mlxsw_sp_nexthop_group *nh_grp,
1503 				 struct mlxsw_sp_nexthop *nh,
1504 				 struct fib_nh *fib_nh)
1505 {
1506 	struct net_device *dev = fib_nh->nh_dev;
1507 	struct in_device *in_dev;
1508 	struct mlxsw_sp_rif *r;
1509 	int err;
1510 
1511 	nh->nh_grp = nh_grp;
1512 	nh->key.fib_nh = fib_nh;
1513 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1514 	if (err)
1515 		return err;
1516 
1517 	in_dev = __in_dev_get_rtnl(dev);
1518 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1519 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
1520 		return 0;
1521 
1522 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1523 	if (!r)
1524 		return 0;
1525 	mlxsw_sp_nexthop_rif_init(nh, r);
1526 
1527 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1528 	if (err)
1529 		goto err_nexthop_neigh_init;
1530 
1531 	return 0;
1532 
1533 err_nexthop_neigh_init:
1534 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1535 	return err;
1536 }
1537 
1538 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1539 				  struct mlxsw_sp_nexthop *nh)
1540 {
1541 	mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1542 	mlxsw_sp_nexthop_rif_fini(nh);
1543 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1544 }
1545 
1546 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1547 				   unsigned long event, struct fib_nh *fib_nh)
1548 {
1549 	struct mlxsw_sp_nexthop_key key;
1550 	struct mlxsw_sp_nexthop *nh;
1551 	struct mlxsw_sp_rif *r;
1552 
1553 	if (mlxsw_sp->router.aborted)
1554 		return;
1555 
1556 	key.fib_nh = fib_nh;
1557 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1558 	if (WARN_ON_ONCE(!nh))
1559 		return;
1560 
1561 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1562 	if (!r)
1563 		return;
1564 
1565 	switch (event) {
1566 	case FIB_EVENT_NH_ADD:
1567 		mlxsw_sp_nexthop_rif_init(nh, r);
1568 		mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1569 		break;
1570 	case FIB_EVENT_NH_DEL:
1571 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1572 		mlxsw_sp_nexthop_rif_fini(nh);
1573 		break;
1574 	}
1575 
1576 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1577 }
1578 
1579 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1580 					   struct mlxsw_sp_rif *r)
1581 {
1582 	struct mlxsw_sp_nexthop *nh, *tmp;
1583 
1584 	list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) {
1585 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1586 		mlxsw_sp_nexthop_rif_fini(nh);
1587 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1588 	}
1589 }
1590 
1591 static struct mlxsw_sp_nexthop_group *
1592 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1593 {
1594 	struct mlxsw_sp_nexthop_group *nh_grp;
1595 	struct mlxsw_sp_nexthop *nh;
1596 	struct fib_nh *fib_nh;
1597 	size_t alloc_size;
1598 	int i;
1599 	int err;
1600 
1601 	alloc_size = sizeof(*nh_grp) +
1602 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1603 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1604 	if (!nh_grp)
1605 		return ERR_PTR(-ENOMEM);
1606 	INIT_LIST_HEAD(&nh_grp->fib_list);
1607 	nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1608 	nh_grp->count = fi->fib_nhs;
1609 	nh_grp->key.fi = fi;
1610 	for (i = 0; i < nh_grp->count; i++) {
1611 		nh = &nh_grp->nexthops[i];
1612 		fib_nh = &fi->fib_nh[i];
1613 		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1614 		if (err)
1615 			goto err_nexthop_init;
1616 	}
1617 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1618 	if (err)
1619 		goto err_nexthop_group_insert;
1620 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1621 	return nh_grp;
1622 
1623 err_nexthop_group_insert:
1624 err_nexthop_init:
1625 	for (i--; i >= 0; i--) {
1626 		nh = &nh_grp->nexthops[i];
1627 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1628 	}
1629 	kfree(nh_grp);
1630 	return ERR_PTR(err);
1631 }
1632 
1633 static void
1634 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1635 			       struct mlxsw_sp_nexthop_group *nh_grp)
1636 {
1637 	struct mlxsw_sp_nexthop *nh;
1638 	int i;
1639 
1640 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1641 	for (i = 0; i < nh_grp->count; i++) {
1642 		nh = &nh_grp->nexthops[i];
1643 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1644 	}
1645 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1646 	WARN_ON_ONCE(nh_grp->adj_index_valid);
1647 	kfree(nh_grp);
1648 }
1649 
1650 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1651 				      struct mlxsw_sp_fib_entry *fib_entry,
1652 				      struct fib_info *fi)
1653 {
1654 	struct mlxsw_sp_nexthop_group_key key;
1655 	struct mlxsw_sp_nexthop_group *nh_grp;
1656 
1657 	key.fi = fi;
1658 	nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1659 	if (!nh_grp) {
1660 		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1661 		if (IS_ERR(nh_grp))
1662 			return PTR_ERR(nh_grp);
1663 	}
1664 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1665 	fib_entry->nh_group = nh_grp;
1666 	return 0;
1667 }
1668 
1669 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1670 				       struct mlxsw_sp_fib_entry *fib_entry)
1671 {
1672 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1673 
1674 	list_del(&fib_entry->nexthop_group_node);
1675 	if (!list_empty(&nh_grp->fib_list))
1676 		return;
1677 	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1678 }
1679 
1680 static bool
1681 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1682 {
1683 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1684 
1685 	if (fib_entry->params.tos)
1686 		return false;
1687 
1688 	switch (fib_entry->type) {
1689 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1690 		return !!nh_group->adj_index_valid;
1691 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1692 		return !!nh_group->nh_rif;
1693 	default:
1694 		return false;
1695 	}
1696 }
1697 
1698 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1699 {
1700 	fib_entry->offloaded = true;
1701 
1702 	switch (fib_entry->fib_node->vr->proto) {
1703 	case MLXSW_SP_L3_PROTO_IPV4:
1704 		fib_info_offload_inc(fib_entry->nh_group->key.fi);
1705 		break;
1706 	case MLXSW_SP_L3_PROTO_IPV6:
1707 		WARN_ON_ONCE(1);
1708 	}
1709 }
1710 
1711 static void
1712 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1713 {
1714 	switch (fib_entry->fib_node->vr->proto) {
1715 	case MLXSW_SP_L3_PROTO_IPV4:
1716 		fib_info_offload_dec(fib_entry->nh_group->key.fi);
1717 		break;
1718 	case MLXSW_SP_L3_PROTO_IPV6:
1719 		WARN_ON_ONCE(1);
1720 	}
1721 
1722 	fib_entry->offloaded = false;
1723 }
1724 
1725 static void
1726 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1727 				   enum mlxsw_reg_ralue_op op, int err)
1728 {
1729 	switch (op) {
1730 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1731 		if (!fib_entry->offloaded)
1732 			return;
1733 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1734 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1735 		if (err)
1736 			return;
1737 		if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1738 		    !fib_entry->offloaded)
1739 			mlxsw_sp_fib_entry_offload_set(fib_entry);
1740 		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1741 			 fib_entry->offloaded)
1742 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
1743 		return;
1744 	default:
1745 		return;
1746 	}
1747 }
1748 
1749 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1750 					 struct mlxsw_sp_fib_entry *fib_entry,
1751 					 enum mlxsw_reg_ralue_op op)
1752 {
1753 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1754 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1755 	struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1756 	enum mlxsw_reg_ralue_trap_action trap_action;
1757 	u16 trap_id = 0;
1758 	u32 adjacency_index = 0;
1759 	u16 ecmp_size = 0;
1760 
1761 	/* In case the nexthop group adjacency index is valid, use it
1762 	 * with provided ECMP size. Otherwise, setup trap and pass
1763 	 * traffic to kernel.
1764 	 */
1765 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1766 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1767 		adjacency_index = fib_entry->nh_group->adj_index;
1768 		ecmp_size = fib_entry->nh_group->ecmp_size;
1769 	} else {
1770 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1771 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1772 	}
1773 
1774 	mlxsw_reg_ralue_pack4(ralue_pl,
1775 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1776 			      vr->id, fib_entry->fib_node->key.prefix_len,
1777 			      *p_dip);
1778 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1779 					adjacency_index, ecmp_size);
1780 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1781 }
1782 
1783 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1784 					struct mlxsw_sp_fib_entry *fib_entry,
1785 					enum mlxsw_reg_ralue_op op)
1786 {
1787 	struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif;
1788 	enum mlxsw_reg_ralue_trap_action trap_action;
1789 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1790 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1791 	struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1792 	u16 trap_id = 0;
1793 	u16 rif = 0;
1794 
1795 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1796 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1797 		rif = r->rif;
1798 	} else {
1799 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1800 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1801 	}
1802 
1803 	mlxsw_reg_ralue_pack4(ralue_pl,
1804 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1805 			      vr->id, fib_entry->fib_node->key.prefix_len,
1806 			      *p_dip);
1807 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif);
1808 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1809 }
1810 
1811 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1812 				       struct mlxsw_sp_fib_entry *fib_entry,
1813 				       enum mlxsw_reg_ralue_op op)
1814 {
1815 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1816 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1817 	struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1818 
1819 	mlxsw_reg_ralue_pack4(ralue_pl,
1820 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1821 			      vr->id, fib_entry->fib_node->key.prefix_len,
1822 			      *p_dip);
1823 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1824 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1825 }
1826 
1827 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1828 				  struct mlxsw_sp_fib_entry *fib_entry,
1829 				  enum mlxsw_reg_ralue_op op)
1830 {
1831 	switch (fib_entry->type) {
1832 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1833 		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1834 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1835 		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1836 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1837 		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1838 	}
1839 	return -EINVAL;
1840 }
1841 
1842 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1843 				 struct mlxsw_sp_fib_entry *fib_entry,
1844 				 enum mlxsw_reg_ralue_op op)
1845 {
1846 	int err = -EINVAL;
1847 
1848 	switch (fib_entry->fib_node->vr->proto) {
1849 	case MLXSW_SP_L3_PROTO_IPV4:
1850 		err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1851 		break;
1852 	case MLXSW_SP_L3_PROTO_IPV6:
1853 		return err;
1854 	}
1855 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
1856 	return err;
1857 }
1858 
1859 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1860 				     struct mlxsw_sp_fib_entry *fib_entry)
1861 {
1862 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1863 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
1864 }
1865 
1866 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1867 				  struct mlxsw_sp_fib_entry *fib_entry)
1868 {
1869 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1870 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
1871 }
1872 
1873 static int
1874 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
1875 			     const struct fib_entry_notifier_info *fen_info,
1876 			     struct mlxsw_sp_fib_entry *fib_entry)
1877 {
1878 	struct fib_info *fi = fen_info->fi;
1879 
1880 	if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1881 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1882 		return 0;
1883 	}
1884 	if (fen_info->type != RTN_UNICAST)
1885 		return -EINVAL;
1886 	if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
1887 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1888 	else
1889 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1890 	return 0;
1891 }
1892 
1893 static struct mlxsw_sp_fib_entry *
1894 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
1895 			   struct mlxsw_sp_fib_node *fib_node,
1896 			   const struct fib_entry_notifier_info *fen_info)
1897 {
1898 	struct mlxsw_sp_fib_entry *fib_entry;
1899 	int err;
1900 
1901 	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
1902 	if (!fib_entry) {
1903 		err = -ENOMEM;
1904 		goto err_fib_entry_alloc;
1905 	}
1906 
1907 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
1908 	if (err)
1909 		goto err_fib4_entry_type_set;
1910 
1911 	err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
1912 	if (err)
1913 		goto err_nexthop_group_get;
1914 
1915 	fib_entry->params.prio = fen_info->fi->fib_priority;
1916 	fib_entry->params.tb_id = fen_info->tb_id;
1917 	fib_entry->params.type = fen_info->type;
1918 	fib_entry->params.tos = fen_info->tos;
1919 
1920 	fib_entry->fib_node = fib_node;
1921 
1922 	return fib_entry;
1923 
1924 err_nexthop_group_get:
1925 err_fib4_entry_type_set:
1926 	kfree(fib_entry);
1927 err_fib_entry_alloc:
1928 	return ERR_PTR(err);
1929 }
1930 
1931 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1932 					struct mlxsw_sp_fib_entry *fib_entry)
1933 {
1934 	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1935 	kfree(fib_entry);
1936 }
1937 
1938 static struct mlxsw_sp_fib_node *
1939 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
1940 		       const struct fib_entry_notifier_info *fen_info);
1941 
1942 static struct mlxsw_sp_fib_entry *
1943 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
1944 			   const struct fib_entry_notifier_info *fen_info)
1945 {
1946 	struct mlxsw_sp_fib_entry *fib_entry;
1947 	struct mlxsw_sp_fib_node *fib_node;
1948 
1949 	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
1950 	if (IS_ERR(fib_node))
1951 		return NULL;
1952 
1953 	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
1954 		if (fib_entry->params.tb_id == fen_info->tb_id &&
1955 		    fib_entry->params.tos == fen_info->tos &&
1956 		    fib_entry->params.type == fen_info->type &&
1957 		    fib_entry->nh_group->key.fi == fen_info->fi) {
1958 			return fib_entry;
1959 		}
1960 	}
1961 
1962 	return NULL;
1963 }
1964 
1965 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
1966 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
1967 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
1968 	.key_len = sizeof(struct mlxsw_sp_fib_key),
1969 	.automatic_shrinking = true,
1970 };
1971 
1972 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
1973 				    struct mlxsw_sp_fib_node *fib_node)
1974 {
1975 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
1976 				      mlxsw_sp_fib_ht_params);
1977 }
1978 
1979 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
1980 				     struct mlxsw_sp_fib_node *fib_node)
1981 {
1982 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
1983 			       mlxsw_sp_fib_ht_params);
1984 }
1985 
1986 static struct mlxsw_sp_fib_node *
1987 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1988 			 size_t addr_len, unsigned char prefix_len)
1989 {
1990 	struct mlxsw_sp_fib_key key;
1991 
1992 	memset(&key, 0, sizeof(key));
1993 	memcpy(key.addr, addr, addr_len);
1994 	key.prefix_len = prefix_len;
1995 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
1996 }
1997 
1998 static struct mlxsw_sp_fib_node *
1999 mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr,
2000 			 size_t addr_len, unsigned char prefix_len)
2001 {
2002 	struct mlxsw_sp_fib_node *fib_node;
2003 
2004 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2005 	if (!fib_node)
2006 		return NULL;
2007 
2008 	INIT_LIST_HEAD(&fib_node->entry_list);
2009 	list_add(&fib_node->list, &vr->fib->node_list);
2010 	memcpy(fib_node->key.addr, addr, addr_len);
2011 	fib_node->key.prefix_len = prefix_len;
2012 	mlxsw_sp_fib_node_insert(vr->fib, fib_node);
2013 	fib_node->vr = vr;
2014 
2015 	return fib_node;
2016 }
2017 
2018 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2019 {
2020 	mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node);
2021 	list_del(&fib_node->list);
2022 	WARN_ON(!list_empty(&fib_node->entry_list));
2023 	kfree(fib_node);
2024 }
2025 
2026 static bool
2027 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2028 				 const struct mlxsw_sp_fib_entry *fib_entry)
2029 {
2030 	return list_first_entry(&fib_node->entry_list,
2031 				struct mlxsw_sp_fib_entry, list) == fib_entry;
2032 }
2033 
2034 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2035 {
2036 	unsigned char prefix_len = fib_node->key.prefix_len;
2037 	struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2038 
2039 	if (fib->prefix_ref_count[prefix_len]++ == 0)
2040 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2041 }
2042 
2043 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2044 {
2045 	unsigned char prefix_len = fib_node->key.prefix_len;
2046 	struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2047 
2048 	if (--fib->prefix_ref_count[prefix_len] == 0)
2049 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2050 }
2051 
2052 static struct mlxsw_sp_fib_node *
2053 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2054 		       const struct fib_entry_notifier_info *fen_info)
2055 {
2056 	struct mlxsw_sp_fib_node *fib_node;
2057 	struct mlxsw_sp_vr *vr;
2058 	int err;
2059 
2060 	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
2061 			     MLXSW_SP_L3_PROTO_IPV4);
2062 	if (IS_ERR(vr))
2063 		return ERR_CAST(vr);
2064 
2065 	fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst,
2066 					    sizeof(fen_info->dst),
2067 					    fen_info->dst_len);
2068 	if (fib_node)
2069 		return fib_node;
2070 
2071 	fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst,
2072 					    sizeof(fen_info->dst),
2073 					    fen_info->dst_len);
2074 	if (!fib_node) {
2075 		err = -ENOMEM;
2076 		goto err_fib_node_create;
2077 	}
2078 
2079 	return fib_node;
2080 
2081 err_fib_node_create:
2082 	mlxsw_sp_vr_put(mlxsw_sp, vr);
2083 	return ERR_PTR(err);
2084 }
2085 
2086 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2087 				   struct mlxsw_sp_fib_node *fib_node)
2088 {
2089 	struct mlxsw_sp_vr *vr = fib_node->vr;
2090 
2091 	if (!list_empty(&fib_node->entry_list))
2092 		return;
2093 	mlxsw_sp_fib_node_destroy(fib_node);
2094 	mlxsw_sp_vr_put(mlxsw_sp, vr);
2095 }
2096 
2097 static struct mlxsw_sp_fib_entry *
2098 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2099 			      const struct mlxsw_sp_fib_entry_params *params)
2100 {
2101 	struct mlxsw_sp_fib_entry *fib_entry;
2102 
2103 	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2104 		if (fib_entry->params.tb_id > params->tb_id)
2105 			continue;
2106 		if (fib_entry->params.tb_id != params->tb_id)
2107 			break;
2108 		if (fib_entry->params.tos > params->tos)
2109 			continue;
2110 		if (fib_entry->params.prio >= params->prio ||
2111 		    fib_entry->params.tos < params->tos)
2112 			return fib_entry;
2113 	}
2114 
2115 	return NULL;
2116 }
2117 
2118 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2119 					  struct mlxsw_sp_fib_entry *new_entry)
2120 {
2121 	struct mlxsw_sp_fib_node *fib_node;
2122 
2123 	if (WARN_ON(!fib_entry))
2124 		return -EINVAL;
2125 
2126 	fib_node = fib_entry->fib_node;
2127 	list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2128 		if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2129 		    fib_entry->params.tos != new_entry->params.tos ||
2130 		    fib_entry->params.prio != new_entry->params.prio)
2131 			break;
2132 	}
2133 
2134 	list_add_tail(&new_entry->list, &fib_entry->list);
2135 	return 0;
2136 }
2137 
2138 static int
2139 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2140 			       struct mlxsw_sp_fib_entry *new_entry,
2141 			       bool replace, bool append)
2142 {
2143 	struct mlxsw_sp_fib_entry *fib_entry;
2144 
2145 	fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2146 
2147 	if (append)
2148 		return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2149 	if (replace && WARN_ON(!fib_entry))
2150 		return -EINVAL;
2151 
2152 	/* Insert new entry before replaced one, so that we can later
2153 	 * remove the second.
2154 	 */
2155 	if (fib_entry) {
2156 		list_add_tail(&new_entry->list, &fib_entry->list);
2157 	} else {
2158 		struct mlxsw_sp_fib_entry *last;
2159 
2160 		list_for_each_entry(last, &fib_node->entry_list, list) {
2161 			if (new_entry->params.tb_id > last->params.tb_id)
2162 				break;
2163 			fib_entry = last;
2164 		}
2165 
2166 		if (fib_entry)
2167 			list_add(&new_entry->list, &fib_entry->list);
2168 		else
2169 			list_add(&new_entry->list, &fib_node->entry_list);
2170 	}
2171 
2172 	return 0;
2173 }
2174 
2175 static void
2176 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2177 {
2178 	list_del(&fib_entry->list);
2179 }
2180 
2181 static int
2182 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2183 			     const struct mlxsw_sp_fib_node *fib_node,
2184 			     struct mlxsw_sp_fib_entry *fib_entry)
2185 {
2186 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2187 		return 0;
2188 
2189 	/* To prevent packet loss, overwrite the previously offloaded
2190 	 * entry.
2191 	 */
2192 	if (!list_is_singular(&fib_node->entry_list)) {
2193 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2194 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2195 
2196 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2197 	}
2198 
2199 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2200 }
2201 
2202 static void
2203 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2204 			     const struct mlxsw_sp_fib_node *fib_node,
2205 			     struct mlxsw_sp_fib_entry *fib_entry)
2206 {
2207 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2208 		return;
2209 
2210 	/* Promote the next entry by overwriting the deleted entry */
2211 	if (!list_is_singular(&fib_node->entry_list)) {
2212 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2213 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2214 
2215 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2216 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2217 		return;
2218 	}
2219 
2220 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2221 }
2222 
2223 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2224 					 struct mlxsw_sp_fib_entry *fib_entry,
2225 					 bool replace, bool append)
2226 {
2227 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2228 	int err;
2229 
2230 	err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2231 					     append);
2232 	if (err)
2233 		return err;
2234 
2235 	err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2236 	if (err)
2237 		goto err_fib4_node_entry_add;
2238 
2239 	mlxsw_sp_fib_node_prefix_inc(fib_node);
2240 
2241 	return 0;
2242 
2243 err_fib4_node_entry_add:
2244 	mlxsw_sp_fib4_node_list_remove(fib_entry);
2245 	return err;
2246 }
2247 
2248 static void
2249 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2250 				struct mlxsw_sp_fib_entry *fib_entry)
2251 {
2252 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2253 
2254 	mlxsw_sp_fib_node_prefix_dec(fib_node);
2255 	mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2256 	mlxsw_sp_fib4_node_list_remove(fib_entry);
2257 }
2258 
2259 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2260 					struct mlxsw_sp_fib_entry *fib_entry,
2261 					bool replace)
2262 {
2263 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2264 	struct mlxsw_sp_fib_entry *replaced;
2265 
2266 	if (!replace)
2267 		return;
2268 
2269 	/* We inserted the new entry before replaced one */
2270 	replaced = list_next_entry(fib_entry, list);
2271 
2272 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2273 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2274 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2275 }
2276 
2277 static int
2278 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2279 			 const struct fib_entry_notifier_info *fen_info,
2280 			 bool replace, bool append)
2281 {
2282 	struct mlxsw_sp_fib_entry *fib_entry;
2283 	struct mlxsw_sp_fib_node *fib_node;
2284 	int err;
2285 
2286 	if (mlxsw_sp->router.aborted)
2287 		return 0;
2288 
2289 	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2290 	if (IS_ERR(fib_node)) {
2291 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2292 		return PTR_ERR(fib_node);
2293 	}
2294 
2295 	fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2296 	if (IS_ERR(fib_entry)) {
2297 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2298 		err = PTR_ERR(fib_entry);
2299 		goto err_fib4_entry_create;
2300 	}
2301 
2302 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2303 					    append);
2304 	if (err) {
2305 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2306 		goto err_fib4_node_entry_link;
2307 	}
2308 
2309 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2310 
2311 	return 0;
2312 
2313 err_fib4_node_entry_link:
2314 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2315 err_fib4_entry_create:
2316 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2317 	return err;
2318 }
2319 
2320 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2321 				     struct fib_entry_notifier_info *fen_info)
2322 {
2323 	struct mlxsw_sp_fib_entry *fib_entry;
2324 	struct mlxsw_sp_fib_node *fib_node;
2325 
2326 	if (mlxsw_sp->router.aborted)
2327 		return;
2328 
2329 	fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2330 	if (WARN_ON(!fib_entry))
2331 		return;
2332 	fib_node = fib_entry->fib_node;
2333 
2334 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2335 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2336 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2337 }
2338 
2339 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2340 {
2341 	char ralta_pl[MLXSW_REG_RALTA_LEN];
2342 	char ralst_pl[MLXSW_REG_RALST_LEN];
2343 	char raltb_pl[MLXSW_REG_RALTB_LEN];
2344 	char ralue_pl[MLXSW_REG_RALUE_LEN];
2345 	int err;
2346 
2347 	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2348 			     MLXSW_SP_LPM_TREE_MIN);
2349 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2350 	if (err)
2351 		return err;
2352 
2353 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2354 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2355 	if (err)
2356 		return err;
2357 
2358 	mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2359 			     MLXSW_SP_LPM_TREE_MIN);
2360 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
2361 	if (err)
2362 		return err;
2363 
2364 	mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2365 			      MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
2366 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2367 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2368 }
2369 
2370 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2371 				     struct mlxsw_sp_fib_node *fib_node)
2372 {
2373 	struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2374 
2375 	list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2376 		bool do_break = &tmp->list == &fib_node->entry_list;
2377 
2378 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2379 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2380 		mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2381 		/* Break when entry list is empty and node was freed.
2382 		 * Otherwise, we'll access freed memory in the next
2383 		 * iteration.
2384 		 */
2385 		if (do_break)
2386 			break;
2387 	}
2388 }
2389 
2390 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2391 				    struct mlxsw_sp_fib_node *fib_node)
2392 {
2393 	switch (fib_node->vr->proto) {
2394 	case MLXSW_SP_L3_PROTO_IPV4:
2395 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2396 		break;
2397 	case MLXSW_SP_L3_PROTO_IPV6:
2398 		WARN_ON_ONCE(1);
2399 		break;
2400 	}
2401 }
2402 
2403 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2404 {
2405 	struct mlxsw_sp_fib_node *fib_node, *tmp;
2406 	struct mlxsw_sp_vr *vr;
2407 	int i;
2408 
2409 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2410 		vr = &mlxsw_sp->router.vrs[i];
2411 
2412 		if (!vr->used)
2413 			continue;
2414 
2415 		list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list,
2416 					 list) {
2417 			bool do_break = &tmp->list == &vr->fib->node_list;
2418 
2419 			mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2420 			if (do_break)
2421 				break;
2422 		}
2423 	}
2424 }
2425 
2426 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2427 {
2428 	int err;
2429 
2430 	if (mlxsw_sp->router.aborted)
2431 		return;
2432 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2433 	mlxsw_sp_router_fib_flush(mlxsw_sp);
2434 	mlxsw_sp->router.aborted = true;
2435 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2436 	if (err)
2437 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2438 }
2439 
2440 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2441 {
2442 	char ritr_pl[MLXSW_REG_RITR_LEN];
2443 	int err;
2444 
2445 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2446 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2447 	if (WARN_ON_ONCE(err))
2448 		return err;
2449 
2450 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
2451 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2452 }
2453 
2454 void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2455 				   struct mlxsw_sp_rif *r)
2456 {
2457 	mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif);
2458 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r);
2459 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r);
2460 }
2461 
2462 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2463 {
2464 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
2465 	u64 max_rifs;
2466 	int err;
2467 
2468 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
2469 		return -EIO;
2470 
2471 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2472 	mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
2473 				 GFP_KERNEL);
2474 	if (!mlxsw_sp->rifs)
2475 		return -ENOMEM;
2476 
2477 	mlxsw_reg_rgcr_pack(rgcr_pl, true);
2478 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
2479 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2480 	if (err)
2481 		goto err_rgcr_fail;
2482 
2483 	return 0;
2484 
2485 err_rgcr_fail:
2486 	kfree(mlxsw_sp->rifs);
2487 	return err;
2488 }
2489 
2490 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2491 {
2492 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
2493 	int i;
2494 
2495 	mlxsw_reg_rgcr_pack(rgcr_pl, false);
2496 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
2497 
2498 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2499 		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
2500 
2501 	kfree(mlxsw_sp->rifs);
2502 }
2503 
2504 struct mlxsw_sp_fib_event_work {
2505 	struct work_struct work;
2506 	union {
2507 		struct fib_entry_notifier_info fen_info;
2508 		struct fib_nh_notifier_info fnh_info;
2509 	};
2510 	struct mlxsw_sp *mlxsw_sp;
2511 	unsigned long event;
2512 };
2513 
2514 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2515 {
2516 	struct mlxsw_sp_fib_event_work *fib_work =
2517 		container_of(work, struct mlxsw_sp_fib_event_work, work);
2518 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2519 	bool replace, append;
2520 	int err;
2521 
2522 	/* Protect internal structures from changes */
2523 	rtnl_lock();
2524 	switch (fib_work->event) {
2525 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2526 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
2527 	case FIB_EVENT_ENTRY_ADD:
2528 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2529 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2530 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2531 					       replace, append);
2532 		if (err)
2533 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
2534 		fib_info_put(fib_work->fen_info.fi);
2535 		break;
2536 	case FIB_EVENT_ENTRY_DEL:
2537 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2538 		fib_info_put(fib_work->fen_info.fi);
2539 		break;
2540 	case FIB_EVENT_RULE_ADD: /* fall through */
2541 	case FIB_EVENT_RULE_DEL:
2542 		mlxsw_sp_router_fib4_abort(mlxsw_sp);
2543 		break;
2544 	case FIB_EVENT_NH_ADD: /* fall through */
2545 	case FIB_EVENT_NH_DEL:
2546 		mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2547 				       fib_work->fnh_info.fib_nh);
2548 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2549 		break;
2550 	}
2551 	rtnl_unlock();
2552 	kfree(fib_work);
2553 }
2554 
2555 /* Called with rcu_read_lock() */
2556 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2557 				     unsigned long event, void *ptr)
2558 {
2559 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2560 	struct mlxsw_sp_fib_event_work *fib_work;
2561 	struct fib_notifier_info *info = ptr;
2562 
2563 	if (!net_eq(info->net, &init_net))
2564 		return NOTIFY_DONE;
2565 
2566 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2567 	if (WARN_ON(!fib_work))
2568 		return NOTIFY_BAD;
2569 
2570 	INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2571 	fib_work->mlxsw_sp = mlxsw_sp;
2572 	fib_work->event = event;
2573 
2574 	switch (event) {
2575 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2576 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
2577 	case FIB_EVENT_ENTRY_ADD: /* fall through */
2578 	case FIB_EVENT_ENTRY_DEL:
2579 		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2580 		/* Take referece on fib_info to prevent it from being
2581 		 * freed while work is queued. Release it afterwards.
2582 		 */
2583 		fib_info_hold(fib_work->fen_info.fi);
2584 		break;
2585 	case FIB_EVENT_NH_ADD: /* fall through */
2586 	case FIB_EVENT_NH_DEL:
2587 		memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2588 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2589 		break;
2590 	}
2591 
2592 	mlxsw_core_schedule_work(&fib_work->work);
2593 
2594 	return NOTIFY_DONE;
2595 }
2596 
2597 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
2598 {
2599 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2600 
2601 	/* Flush pending FIB notifications and then flush the device's
2602 	 * table before requesting another dump. The FIB notification
2603 	 * block is unregistered, so no need to take RTNL.
2604 	 */
2605 	mlxsw_core_flush_owq();
2606 	mlxsw_sp_router_fib_flush(mlxsw_sp);
2607 }
2608 
2609 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2610 {
2611 	int err;
2612 
2613 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
2614 	err = __mlxsw_sp_router_init(mlxsw_sp);
2615 	if (err)
2616 		return err;
2617 
2618 	err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
2619 			      &mlxsw_sp_nexthop_ht_params);
2620 	if (err)
2621 		goto err_nexthop_ht_init;
2622 
2623 	err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
2624 			      &mlxsw_sp_nexthop_group_ht_params);
2625 	if (err)
2626 		goto err_nexthop_group_ht_init;
2627 
2628 	mlxsw_sp_lpm_init(mlxsw_sp);
2629 	err = mlxsw_sp_vrs_init(mlxsw_sp);
2630 	if (err)
2631 		goto err_vrs_init;
2632 
2633 	err = mlxsw_sp_neigh_init(mlxsw_sp);
2634 	if (err)
2635 		goto err_neigh_init;
2636 
2637 	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
2638 	err = register_fib_notifier(&mlxsw_sp->fib_nb,
2639 				    mlxsw_sp_router_fib_dump_flush);
2640 	if (err)
2641 		goto err_register_fib_notifier;
2642 
2643 	return 0;
2644 
2645 err_register_fib_notifier:
2646 	mlxsw_sp_neigh_fini(mlxsw_sp);
2647 err_neigh_init:
2648 	mlxsw_sp_vrs_fini(mlxsw_sp);
2649 err_vrs_init:
2650 	rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2651 err_nexthop_group_ht_init:
2652 	rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2653 err_nexthop_ht_init:
2654 	__mlxsw_sp_router_fini(mlxsw_sp);
2655 	return err;
2656 }
2657 
2658 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2659 {
2660 	unregister_fib_notifier(&mlxsw_sp->fib_nb);
2661 	mlxsw_sp_neigh_fini(mlxsw_sp);
2662 	mlxsw_sp_vrs_fini(mlxsw_sp);
2663 	rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
2664 	rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
2665 	__mlxsw_sp_router_fini(mlxsw_sp);
2666 }
2667