1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <net/netevent.h>
44 #include <net/neighbour.h>
45 #include <net/arp.h>
46 
47 #include "spectrum.h"
48 #include "core.h"
49 #include "reg.h"
50 
51 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
52 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
53 
54 static bool
55 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
56 			     struct mlxsw_sp_prefix_usage *prefix_usage2)
57 {
58 	unsigned char prefix;
59 
60 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
61 		if (!test_bit(prefix, prefix_usage2->b))
62 			return false;
63 	}
64 	return true;
65 }
66 
67 static bool
68 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
69 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
70 {
71 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
72 }
73 
74 static bool
75 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
76 {
77 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
78 
79 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
80 }
81 
82 static void
83 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
84 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
85 {
86 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
87 }
88 
89 static void
90 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
91 {
92 	memset(prefix_usage, 0, sizeof(*prefix_usage));
93 }
94 
95 static void
96 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
97 			  unsigned char prefix_len)
98 {
99 	set_bit(prefix_len, prefix_usage->b);
100 }
101 
102 static void
103 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
104 			    unsigned char prefix_len)
105 {
106 	clear_bit(prefix_len, prefix_usage->b);
107 }
108 
109 struct mlxsw_sp_fib_key {
110 	unsigned char addr[sizeof(struct in6_addr)];
111 	unsigned char prefix_len;
112 };
113 
114 enum mlxsw_sp_fib_entry_type {
115 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
116 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
117 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
118 };
119 
120 struct mlxsw_sp_nexthop_group;
121 
122 struct mlxsw_sp_fib_entry {
123 	struct rhash_head ht_node;
124 	struct mlxsw_sp_fib_key key;
125 	enum mlxsw_sp_fib_entry_type type;
126 	u8 added:1;
127 	u16 rif; /* used for action local */
128 	struct mlxsw_sp_vr *vr;
129 	struct list_head nexthop_group_node;
130 	struct mlxsw_sp_nexthop_group *nh_group;
131 };
132 
133 struct mlxsw_sp_fib {
134 	struct rhashtable ht;
135 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
136 	struct mlxsw_sp_prefix_usage prefix_usage;
137 };
138 
139 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
140 	.key_offset = offsetof(struct mlxsw_sp_fib_entry, key),
141 	.head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node),
142 	.key_len = sizeof(struct mlxsw_sp_fib_key),
143 	.automatic_shrinking = true,
144 };
145 
146 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib,
147 				     struct mlxsw_sp_fib_entry *fib_entry)
148 {
149 	unsigned char prefix_len = fib_entry->key.prefix_len;
150 	int err;
151 
152 	err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node,
153 				     mlxsw_sp_fib_ht_params);
154 	if (err)
155 		return err;
156 	if (fib->prefix_ref_count[prefix_len]++ == 0)
157 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
158 	return 0;
159 }
160 
161 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib,
162 				      struct mlxsw_sp_fib_entry *fib_entry)
163 {
164 	unsigned char prefix_len = fib_entry->key.prefix_len;
165 
166 	if (--fib->prefix_ref_count[prefix_len] == 0)
167 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
168 	rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node,
169 			       mlxsw_sp_fib_ht_params);
170 }
171 
172 static struct mlxsw_sp_fib_entry *
173 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
174 			  size_t addr_len, unsigned char prefix_len)
175 {
176 	struct mlxsw_sp_fib_entry *fib_entry;
177 
178 	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
179 	if (!fib_entry)
180 		return NULL;
181 	memcpy(fib_entry->key.addr, addr, addr_len);
182 	fib_entry->key.prefix_len = prefix_len;
183 	return fib_entry;
184 }
185 
186 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry)
187 {
188 	kfree(fib_entry);
189 }
190 
191 static struct mlxsw_sp_fib_entry *
192 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
193 			  size_t addr_len, unsigned char prefix_len)
194 {
195 	struct mlxsw_sp_fib_key key = {{ 0 } };
196 
197 	memcpy(key.addr, addr, addr_len);
198 	key.prefix_len = prefix_len;
199 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
200 }
201 
202 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
203 {
204 	struct mlxsw_sp_fib *fib;
205 	int err;
206 
207 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
208 	if (!fib)
209 		return ERR_PTR(-ENOMEM);
210 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
211 	if (err)
212 		goto err_rhashtable_init;
213 	return fib;
214 
215 err_rhashtable_init:
216 	kfree(fib);
217 	return ERR_PTR(err);
218 }
219 
220 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
221 {
222 	rhashtable_destroy(&fib->ht);
223 	kfree(fib);
224 }
225 
226 static struct mlxsw_sp_lpm_tree *
227 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
228 {
229 	static struct mlxsw_sp_lpm_tree *lpm_tree;
230 	int i;
231 
232 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
233 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
234 		if (lpm_tree->ref_count == 0) {
235 			if (one_reserved)
236 				one_reserved = false;
237 			else
238 				return lpm_tree;
239 		}
240 	}
241 	return NULL;
242 }
243 
244 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
245 				   struct mlxsw_sp_lpm_tree *lpm_tree)
246 {
247 	char ralta_pl[MLXSW_REG_RALTA_LEN];
248 
249 	mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id);
250 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
251 }
252 
253 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
254 				  struct mlxsw_sp_lpm_tree *lpm_tree)
255 {
256 	char ralta_pl[MLXSW_REG_RALTA_LEN];
257 
258 	mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id);
259 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
260 }
261 
262 static int
263 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
264 				  struct mlxsw_sp_prefix_usage *prefix_usage,
265 				  struct mlxsw_sp_lpm_tree *lpm_tree)
266 {
267 	char ralst_pl[MLXSW_REG_RALST_LEN];
268 	u8 root_bin = 0;
269 	u8 prefix;
270 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
271 
272 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
273 		root_bin = prefix;
274 
275 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
276 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
277 		if (prefix == 0)
278 			continue;
279 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
280 					 MLXSW_REG_RALST_BIN_NO_CHILD);
281 		last_prefix = prefix;
282 	}
283 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
284 }
285 
286 static struct mlxsw_sp_lpm_tree *
287 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
288 			 struct mlxsw_sp_prefix_usage *prefix_usage,
289 			 enum mlxsw_sp_l3proto proto, bool one_reserved)
290 {
291 	struct mlxsw_sp_lpm_tree *lpm_tree;
292 	int err;
293 
294 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
295 	if (!lpm_tree)
296 		return ERR_PTR(-EBUSY);
297 	lpm_tree->proto = proto;
298 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
299 	if (err)
300 		return ERR_PTR(err);
301 
302 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
303 						lpm_tree);
304 	if (err)
305 		goto err_left_struct_set;
306 	return lpm_tree;
307 
308 err_left_struct_set:
309 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
310 	return ERR_PTR(err);
311 }
312 
313 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
314 				     struct mlxsw_sp_lpm_tree *lpm_tree)
315 {
316 	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
317 }
318 
319 static struct mlxsw_sp_lpm_tree *
320 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
321 		      struct mlxsw_sp_prefix_usage *prefix_usage,
322 		      enum mlxsw_sp_l3proto proto, bool one_reserved)
323 {
324 	struct mlxsw_sp_lpm_tree *lpm_tree;
325 	int i;
326 
327 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
328 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
329 		if (lpm_tree->proto == proto &&
330 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
331 					     prefix_usage))
332 			goto inc_ref_count;
333 	}
334 	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
335 					    proto, one_reserved);
336 	if (IS_ERR(lpm_tree))
337 		return lpm_tree;
338 
339 inc_ref_count:
340 	lpm_tree->ref_count++;
341 	return lpm_tree;
342 }
343 
344 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
345 				 struct mlxsw_sp_lpm_tree *lpm_tree)
346 {
347 	if (--lpm_tree->ref_count == 0)
348 		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
349 	return 0;
350 }
351 
352 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
353 {
354 	struct mlxsw_sp_lpm_tree *lpm_tree;
355 	int i;
356 
357 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
358 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
359 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
360 	}
361 }
362 
363 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
364 {
365 	struct mlxsw_sp_vr *vr;
366 	int i;
367 
368 	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
369 		vr = &mlxsw_sp->router.vrs[i];
370 		if (!vr->used)
371 			return vr;
372 	}
373 	return NULL;
374 }
375 
376 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
377 				     struct mlxsw_sp_vr *vr)
378 {
379 	char raltb_pl[MLXSW_REG_RALTB_LEN];
380 
381 	mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id);
382 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
383 }
384 
385 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
386 				       struct mlxsw_sp_vr *vr)
387 {
388 	char raltb_pl[MLXSW_REG_RALTB_LEN];
389 
390 	/* Bind to tree 0 which is default */
391 	mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0);
392 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
393 }
394 
395 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
396 {
397 	/* For our purpose, squash main and local table into one */
398 	if (tb_id == RT_TABLE_LOCAL)
399 		tb_id = RT_TABLE_MAIN;
400 	return tb_id;
401 }
402 
403 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
404 					    u32 tb_id,
405 					    enum mlxsw_sp_l3proto proto)
406 {
407 	struct mlxsw_sp_vr *vr;
408 	int i;
409 
410 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
411 	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
412 		vr = &mlxsw_sp->router.vrs[i];
413 		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
414 			return vr;
415 	}
416 	return NULL;
417 }
418 
419 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
420 					      unsigned char prefix_len,
421 					      u32 tb_id,
422 					      enum mlxsw_sp_l3proto proto)
423 {
424 	struct mlxsw_sp_prefix_usage req_prefix_usage;
425 	struct mlxsw_sp_lpm_tree *lpm_tree;
426 	struct mlxsw_sp_vr *vr;
427 	int err;
428 
429 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
430 	if (!vr)
431 		return ERR_PTR(-EBUSY);
432 	vr->fib = mlxsw_sp_fib_create();
433 	if (IS_ERR(vr->fib))
434 		return ERR_CAST(vr->fib);
435 
436 	vr->proto = proto;
437 	vr->tb_id = tb_id;
438 	mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
439 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
440 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
441 					 proto, true);
442 	if (IS_ERR(lpm_tree)) {
443 		err = PTR_ERR(lpm_tree);
444 		goto err_tree_get;
445 	}
446 	vr->lpm_tree = lpm_tree;
447 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
448 	if (err)
449 		goto err_tree_bind;
450 
451 	vr->used = true;
452 	return vr;
453 
454 err_tree_bind:
455 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
456 err_tree_get:
457 	mlxsw_sp_fib_destroy(vr->fib);
458 
459 	return ERR_PTR(err);
460 }
461 
462 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
463 				struct mlxsw_sp_vr *vr)
464 {
465 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
466 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
467 	mlxsw_sp_fib_destroy(vr->fib);
468 	vr->used = false;
469 }
470 
471 static int
472 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
473 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
474 {
475 	struct mlxsw_sp_lpm_tree *lpm_tree;
476 
477 	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
478 				     &vr->lpm_tree->prefix_usage))
479 		return 0;
480 
481 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
482 					 vr->proto, false);
483 	if (IS_ERR(lpm_tree)) {
484 		/* We failed to get a tree according to the required
485 		 * prefix usage. However, the current tree might be still good
486 		 * for us if our requirement is subset of the prefixes used
487 		 * in the tree.
488 		 */
489 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
490 						 &vr->lpm_tree->prefix_usage))
491 			return 0;
492 		return PTR_ERR(lpm_tree);
493 	}
494 
495 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
496 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
497 	vr->lpm_tree = lpm_tree;
498 	return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
499 }
500 
501 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
502 					   unsigned char prefix_len,
503 					   u32 tb_id,
504 					   enum mlxsw_sp_l3proto proto)
505 {
506 	struct mlxsw_sp_vr *vr;
507 	int err;
508 
509 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
510 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
511 	if (!vr) {
512 		vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
513 		if (IS_ERR(vr))
514 			return vr;
515 	} else {
516 		struct mlxsw_sp_prefix_usage req_prefix_usage;
517 
518 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
519 					  &vr->fib->prefix_usage);
520 		mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
521 		/* Need to replace LPM tree in case new prefix is required. */
522 		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
523 						 &req_prefix_usage);
524 		if (err)
525 			return ERR_PTR(err);
526 	}
527 	return vr;
528 }
529 
530 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
531 {
532 	/* Destroy virtual router entity in case the associated FIB is empty
533 	 * and allow it to be used for other tables in future. Otherwise,
534 	 * check if some prefix usage did not disappear and change tree if
535 	 * that is the case. Note that in case new, smaller tree cannot be
536 	 * allocated, the original one will be kept being used.
537 	 */
538 	if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
539 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
540 	else
541 		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
542 					   &vr->fib->prefix_usage);
543 }
544 
545 static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
546 {
547 	struct mlxsw_sp_vr *vr;
548 	int i;
549 
550 	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
551 		vr = &mlxsw_sp->router.vrs[i];
552 		vr->id = i;
553 	}
554 }
555 
556 struct mlxsw_sp_neigh_key {
557 	unsigned char addr[sizeof(struct in6_addr)];
558 	struct net_device *dev;
559 };
560 
561 struct mlxsw_sp_neigh_entry {
562 	struct rhash_head ht_node;
563 	struct mlxsw_sp_neigh_key key;
564 	u16 rif;
565 	struct neighbour *n;
566 	bool offloaded;
567 	struct delayed_work dw;
568 	struct mlxsw_sp_port *mlxsw_sp_port;
569 	unsigned char ha[ETH_ALEN];
570 	struct list_head nexthop_list; /* list of nexthops using
571 					* this neigh entry
572 					*/
573 	struct list_head nexthop_neighs_list_node;
574 };
575 
576 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
577 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
578 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
579 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
580 };
581 
582 static int
583 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
584 			    struct mlxsw_sp_neigh_entry *neigh_entry)
585 {
586 	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
587 				      &neigh_entry->ht_node,
588 				      mlxsw_sp_neigh_ht_params);
589 }
590 
591 static void
592 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
593 			    struct mlxsw_sp_neigh_entry *neigh_entry)
594 {
595 	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
596 			       &neigh_entry->ht_node,
597 			       mlxsw_sp_neigh_ht_params);
598 }
599 
600 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work);
601 
602 static struct mlxsw_sp_neigh_entry *
603 mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
604 			    struct net_device *dev, u16 rif,
605 			    struct neighbour *n)
606 {
607 	struct mlxsw_sp_neigh_entry *neigh_entry;
608 
609 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
610 	if (!neigh_entry)
611 		return NULL;
612 	memcpy(neigh_entry->key.addr, addr, addr_len);
613 	neigh_entry->key.dev = dev;
614 	neigh_entry->rif = rif;
615 	neigh_entry->n = n;
616 	INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
617 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
618 	return neigh_entry;
619 }
620 
621 static void
622 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry)
623 {
624 	kfree(neigh_entry);
625 }
626 
627 static struct mlxsw_sp_neigh_entry *
628 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr,
629 			    size_t addr_len, struct net_device *dev)
630 {
631 	struct mlxsw_sp_neigh_key key = {{ 0 } };
632 
633 	memcpy(key.addr, addr, addr_len);
634 	key.dev = dev;
635 	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
636 				      &key, mlxsw_sp_neigh_ht_params);
637 }
638 
639 int mlxsw_sp_router_neigh_construct(struct net_device *dev,
640 				    struct neighbour *n)
641 {
642 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
643 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
644 	struct mlxsw_sp_neigh_entry *neigh_entry;
645 	struct mlxsw_sp_rif *r;
646 	u32 dip;
647 	int err;
648 
649 	if (n->tbl != &arp_tbl)
650 		return 0;
651 
652 	dip = ntohl(*((__be32 *) n->primary_key));
653 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
654 						  n->dev);
655 	if (neigh_entry) {
656 		WARN_ON(neigh_entry->n != n);
657 		return 0;
658 	}
659 
660 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
661 	if (WARN_ON(!r))
662 		return -EINVAL;
663 
664 	neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev,
665 						  r->rif, n);
666 	if (!neigh_entry)
667 		return -ENOMEM;
668 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
669 	if (err)
670 		goto err_neigh_entry_insert;
671 	return 0;
672 
673 err_neigh_entry_insert:
674 	mlxsw_sp_neigh_entry_destroy(neigh_entry);
675 	return err;
676 }
677 
678 void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
679 				   struct neighbour *n)
680 {
681 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
682 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
683 	struct mlxsw_sp_neigh_entry *neigh_entry;
684 	u32 dip;
685 
686 	if (n->tbl != &arp_tbl)
687 		return;
688 
689 	dip = ntohl(*((__be32 *) n->primary_key));
690 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
691 						  n->dev);
692 	if (!neigh_entry)
693 		return;
694 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
695 	mlxsw_sp_neigh_entry_destroy(neigh_entry);
696 }
697 
698 static void
699 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
700 {
701 	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
702 
703 	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
704 }
705 
706 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
707 						   char *rauhtd_pl,
708 						   int ent_index)
709 {
710 	struct net_device *dev;
711 	struct neighbour *n;
712 	__be32 dipn;
713 	u32 dip;
714 	u16 rif;
715 
716 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
717 
718 	if (!mlxsw_sp->rifs[rif]) {
719 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
720 		return;
721 	}
722 
723 	dipn = htonl(dip);
724 	dev = mlxsw_sp->rifs[rif]->dev;
725 	n = neigh_lookup(&arp_tbl, &dipn, dev);
726 	if (!n) {
727 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
728 			   &dip);
729 		return;
730 	}
731 
732 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
733 	neigh_event_send(n, NULL);
734 	neigh_release(n);
735 }
736 
737 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
738 						   char *rauhtd_pl,
739 						   int rec_index)
740 {
741 	u8 num_entries;
742 	int i;
743 
744 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
745 								rec_index);
746 	/* Hardware starts counting at 0, so add 1. */
747 	num_entries++;
748 
749 	/* Each record consists of several neighbour entries. */
750 	for (i = 0; i < num_entries; i++) {
751 		int ent_index;
752 
753 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
754 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
755 						       ent_index);
756 	}
757 
758 }
759 
760 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
761 					      char *rauhtd_pl, int rec_index)
762 {
763 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
764 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
765 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
766 						       rec_index);
767 		break;
768 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
769 		WARN_ON_ONCE(1);
770 		break;
771 	}
772 }
773 
774 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
775 {
776 	char *rauhtd_pl;
777 	u8 num_rec;
778 	int i, err;
779 
780 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
781 	if (!rauhtd_pl)
782 		return -ENOMEM;
783 
784 	/* Make sure the neighbour's netdev isn't removed in the
785 	 * process.
786 	 */
787 	rtnl_lock();
788 	do {
789 		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
790 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
791 				      rauhtd_pl);
792 		if (err) {
793 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
794 			break;
795 		}
796 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
797 		for (i = 0; i < num_rec; i++)
798 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
799 							  i);
800 	} while (num_rec);
801 	rtnl_unlock();
802 
803 	kfree(rauhtd_pl);
804 	return err;
805 }
806 
807 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
808 {
809 	struct mlxsw_sp_neigh_entry *neigh_entry;
810 
811 	/* Take RTNL mutex here to prevent lists from changes */
812 	rtnl_lock();
813 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
814 			    nexthop_neighs_list_node) {
815 		/* If this neigh have nexthops, make the kernel think this neigh
816 		 * is active regardless of the traffic.
817 		 */
818 		if (!list_empty(&neigh_entry->nexthop_list))
819 			neigh_event_send(neigh_entry->n, NULL);
820 	}
821 	rtnl_unlock();
822 }
823 
824 static void
825 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
826 {
827 	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
828 
829 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
830 			       msecs_to_jiffies(interval));
831 }
832 
833 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
834 {
835 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
836 						 router.neighs_update.dw.work);
837 	int err;
838 
839 	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
840 	if (err)
841 		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
842 
843 	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
844 
845 	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
846 }
847 
848 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
849 {
850 	struct mlxsw_sp_neigh_entry *neigh_entry;
851 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
852 						 router.nexthop_probe_dw.work);
853 
854 	/* Iterate over nexthop neighbours, find those who are unresolved and
855 	 * send arp on them. This solves the chicken-egg problem when
856 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
857 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
858 	 * using different nexthop.
859 	 *
860 	 * Take RTNL mutex here to prevent lists from changes.
861 	 */
862 	rtnl_lock();
863 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
864 			    nexthop_neighs_list_node) {
865 		if (!(neigh_entry->n->nud_state & NUD_VALID) &&
866 		    !list_empty(&neigh_entry->nexthop_list))
867 			neigh_event_send(neigh_entry->n, NULL);
868 	}
869 	rtnl_unlock();
870 
871 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
872 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
873 }
874 
875 static void
876 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
877 			      struct mlxsw_sp_neigh_entry *neigh_entry,
878 			      bool removing);
879 
880 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
881 {
882 	struct mlxsw_sp_neigh_entry *neigh_entry =
883 		container_of(work, struct mlxsw_sp_neigh_entry, dw.work);
884 	struct neighbour *n = neigh_entry->n;
885 	struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port;
886 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
887 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
888 	struct net_device *dev;
889 	bool entry_connected;
890 	u8 nud_state;
891 	bool updating;
892 	bool removing;
893 	bool adding;
894 	u32 dip;
895 	int err;
896 
897 	read_lock_bh(&n->lock);
898 	dip = ntohl(*((__be32 *) n->primary_key));
899 	memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha));
900 	nud_state = n->nud_state;
901 	dev = n->dev;
902 	read_unlock_bh(&n->lock);
903 
904 	entry_connected = nud_state & NUD_VALID;
905 	adding = (!neigh_entry->offloaded) && entry_connected;
906 	updating = neigh_entry->offloaded && entry_connected;
907 	removing = neigh_entry->offloaded && !entry_connected;
908 
909 	if (adding || updating) {
910 		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD,
911 				      neigh_entry->rif,
912 				      neigh_entry->ha, dip);
913 		err = mlxsw_reg_write(mlxsw_sp->core,
914 				      MLXSW_REG(rauht), rauht_pl);
915 		if (err) {
916 			netdev_err(dev, "Could not add neigh %pI4h\n", &dip);
917 			neigh_entry->offloaded = false;
918 		} else {
919 			neigh_entry->offloaded = true;
920 		}
921 		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
922 	} else if (removing) {
923 		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
924 				      neigh_entry->rif,
925 				      neigh_entry->ha, dip);
926 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht),
927 				      rauht_pl);
928 		if (err) {
929 			netdev_err(dev, "Could not delete neigh %pI4h\n", &dip);
930 			neigh_entry->offloaded = true;
931 		} else {
932 			neigh_entry->offloaded = false;
933 		}
934 		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
935 	}
936 
937 	neigh_release(n);
938 	mlxsw_sp_port_dev_put(mlxsw_sp_port);
939 }
940 
941 static int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
942 					  unsigned long event, void *ptr)
943 {
944 	struct mlxsw_sp_neigh_entry *neigh_entry;
945 	struct mlxsw_sp_port *mlxsw_sp_port;
946 	struct mlxsw_sp *mlxsw_sp;
947 	unsigned long interval;
948 	struct net_device *dev;
949 	struct neigh_parms *p;
950 	struct neighbour *n;
951 	u32 dip;
952 
953 	switch (event) {
954 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
955 		p = ptr;
956 
957 		/* We don't care about changes in the default table. */
958 		if (!p->dev || p->tbl != &arp_tbl)
959 			return NOTIFY_DONE;
960 
961 		/* We are in atomic context and can't take RTNL mutex,
962 		 * so use RCU variant to walk the device chain.
963 		 */
964 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
965 		if (!mlxsw_sp_port)
966 			return NOTIFY_DONE;
967 
968 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
969 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
970 		mlxsw_sp->router.neighs_update.interval = interval;
971 
972 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
973 		break;
974 	case NETEVENT_NEIGH_UPDATE:
975 		n = ptr;
976 		dev = n->dev;
977 
978 		if (n->tbl != &arp_tbl)
979 			return NOTIFY_DONE;
980 
981 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev);
982 		if (!mlxsw_sp_port)
983 			return NOTIFY_DONE;
984 
985 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
986 		dip = ntohl(*((__be32 *) n->primary_key));
987 		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp,
988 							  &dip,
989 							  sizeof(__be32),
990 							  dev);
991 		if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) {
992 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
993 			return NOTIFY_DONE;
994 		}
995 		neigh_entry->mlxsw_sp_port = mlxsw_sp_port;
996 
997 		/* Take a reference to ensure the neighbour won't be
998 		 * destructed until we drop the reference in delayed
999 		 * work.
1000 		 */
1001 		neigh_clone(n);
1002 		if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) {
1003 			neigh_release(n);
1004 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
1005 		}
1006 		break;
1007 	}
1008 
1009 	return NOTIFY_DONE;
1010 }
1011 
1012 static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
1013 	.notifier_call = mlxsw_sp_router_netevent_event,
1014 };
1015 
1016 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1017 {
1018 	int err;
1019 
1020 	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1021 			      &mlxsw_sp_neigh_ht_params);
1022 	if (err)
1023 		return err;
1024 
1025 	/* Initialize the polling interval according to the default
1026 	 * table.
1027 	 */
1028 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1029 
1030 	err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
1031 	if (err)
1032 		goto err_register_netevent_notifier;
1033 
1034 	/* Create the delayed works for the activity_update */
1035 	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1036 			  mlxsw_sp_router_neighs_update_work);
1037 	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1038 			  mlxsw_sp_router_probe_unresolved_nexthops);
1039 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1040 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1041 	return 0;
1042 
1043 err_register_netevent_notifier:
1044 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1045 	return err;
1046 }
1047 
1048 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1049 {
1050 	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1051 	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1052 	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
1053 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1054 }
1055 
1056 struct mlxsw_sp_nexthop {
1057 	struct list_head neigh_list_node; /* member of neigh entry list */
1058 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1059 						* this belongs to
1060 						*/
1061 	u8 should_offload:1, /* set indicates this neigh is connected and
1062 			      * should be put to KVD linear area of this group.
1063 			      */
1064 	   offloaded:1, /* set in case the neigh is actually put into
1065 			 * KVD linear area of this group.
1066 			 */
1067 	   update:1; /* set indicates that MAC of this neigh should be
1068 		      * updated in HW
1069 		      */
1070 	struct mlxsw_sp_neigh_entry *neigh_entry;
1071 };
1072 
1073 struct mlxsw_sp_nexthop_group {
1074 	struct list_head list; /* node in mlxsw->router.nexthop_group_list */
1075 	struct list_head fib_list; /* list of fib entries that use this group */
1076 	u8 adj_index_valid:1;
1077 	u32 adj_index;
1078 	u16 ecmp_size;
1079 	u16 count;
1080 	struct mlxsw_sp_nexthop nexthops[0];
1081 };
1082 
1083 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1084 					     struct mlxsw_sp_vr *vr,
1085 					     u32 adj_index, u16 ecmp_size,
1086 					     u32 new_adj_index,
1087 					     u16 new_ecmp_size)
1088 {
1089 	char raleu_pl[MLXSW_REG_RALEU_LEN];
1090 
1091 	mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id,
1092 			     adj_index, ecmp_size,
1093 			     new_adj_index, new_ecmp_size);
1094 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1095 }
1096 
1097 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1098 					  struct mlxsw_sp_nexthop_group *nh_grp,
1099 					  u32 old_adj_index, u16 old_ecmp_size)
1100 {
1101 	struct mlxsw_sp_fib_entry *fib_entry;
1102 	struct mlxsw_sp_vr *vr = NULL;
1103 	int err;
1104 
1105 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1106 		if (vr == fib_entry->vr)
1107 			continue;
1108 		vr = fib_entry->vr;
1109 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1110 							old_adj_index,
1111 							old_ecmp_size,
1112 							nh_grp->adj_index,
1113 							nh_grp->ecmp_size);
1114 		if (err)
1115 			return err;
1116 	}
1117 	return 0;
1118 }
1119 
1120 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1121 				       struct mlxsw_sp_nexthop *nh)
1122 {
1123 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1124 	char ratr_pl[MLXSW_REG_RATR_LEN];
1125 
1126 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1127 			    true, adj_index, neigh_entry->rif);
1128 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1129 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1130 }
1131 
1132 static int
1133 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1134 				  struct mlxsw_sp_nexthop_group *nh_grp)
1135 {
1136 	u32 adj_index = nh_grp->adj_index; /* base */
1137 	struct mlxsw_sp_nexthop *nh;
1138 	int i;
1139 	int err;
1140 
1141 	for (i = 0; i < nh_grp->count; i++) {
1142 		nh = &nh_grp->nexthops[i];
1143 
1144 		if (!nh->should_offload) {
1145 			nh->offloaded = 0;
1146 			continue;
1147 		}
1148 
1149 		if (nh->update) {
1150 			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1151 							  adj_index, nh);
1152 			if (err)
1153 				return err;
1154 			nh->update = 0;
1155 			nh->offloaded = 1;
1156 		}
1157 		adj_index++;
1158 	}
1159 	return 0;
1160 }
1161 
1162 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1163 				     struct mlxsw_sp_fib_entry *fib_entry);
1164 
1165 static int
1166 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1167 				    struct mlxsw_sp_nexthop_group *nh_grp)
1168 {
1169 	struct mlxsw_sp_fib_entry *fib_entry;
1170 	int err;
1171 
1172 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1173 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1174 		if (err)
1175 			return err;
1176 	}
1177 	return 0;
1178 }
1179 
1180 static void
1181 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1182 			       struct mlxsw_sp_nexthop_group *nh_grp)
1183 {
1184 	struct mlxsw_sp_nexthop *nh;
1185 	bool offload_change = false;
1186 	u32 adj_index;
1187 	u16 ecmp_size = 0;
1188 	bool old_adj_index_valid;
1189 	u32 old_adj_index;
1190 	u16 old_ecmp_size;
1191 	int ret;
1192 	int i;
1193 	int err;
1194 
1195 	for (i = 0; i < nh_grp->count; i++) {
1196 		nh = &nh_grp->nexthops[i];
1197 
1198 		if (nh->should_offload ^ nh->offloaded) {
1199 			offload_change = true;
1200 			if (nh->should_offload)
1201 				nh->update = 1;
1202 		}
1203 		if (nh->should_offload)
1204 			ecmp_size++;
1205 	}
1206 	if (!offload_change) {
1207 		/* Nothing was added or removed, so no need to reallocate. Just
1208 		 * update MAC on existing adjacency indexes.
1209 		 */
1210 		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
1211 		if (err) {
1212 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1213 			goto set_trap;
1214 		}
1215 		return;
1216 	}
1217 	if (!ecmp_size)
1218 		/* No neigh of this group is connected so we just set
1219 		 * the trap and let everthing flow through kernel.
1220 		 */
1221 		goto set_trap;
1222 
1223 	ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1224 	if (ret < 0) {
1225 		/* We ran out of KVD linear space, just set the
1226 		 * trap and let everything flow through kernel.
1227 		 */
1228 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1229 		goto set_trap;
1230 	}
1231 	adj_index = ret;
1232 	old_adj_index_valid = nh_grp->adj_index_valid;
1233 	old_adj_index = nh_grp->adj_index;
1234 	old_ecmp_size = nh_grp->ecmp_size;
1235 	nh_grp->adj_index_valid = 1;
1236 	nh_grp->adj_index = adj_index;
1237 	nh_grp->ecmp_size = ecmp_size;
1238 	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
1239 	if (err) {
1240 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1241 		goto set_trap;
1242 	}
1243 
1244 	if (!old_adj_index_valid) {
1245 		/* The trap was set for fib entries, so we have to call
1246 		 * fib entry update to unset it and use adjacency index.
1247 		 */
1248 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1249 		if (err) {
1250 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1251 			goto set_trap;
1252 		}
1253 		return;
1254 	}
1255 
1256 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1257 					     old_adj_index, old_ecmp_size);
1258 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1259 	if (err) {
1260 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1261 		goto set_trap;
1262 	}
1263 	return;
1264 
1265 set_trap:
1266 	old_adj_index_valid = nh_grp->adj_index_valid;
1267 	nh_grp->adj_index_valid = 0;
1268 	for (i = 0; i < nh_grp->count; i++) {
1269 		nh = &nh_grp->nexthops[i];
1270 		nh->offloaded = 0;
1271 	}
1272 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1273 	if (err)
1274 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1275 	if (old_adj_index_valid)
1276 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1277 }
1278 
1279 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1280 					    bool removing)
1281 {
1282 	if (!removing && !nh->should_offload)
1283 		nh->should_offload = 1;
1284 	else if (removing && nh->offloaded)
1285 		nh->should_offload = 0;
1286 	nh->update = 1;
1287 }
1288 
1289 static void
1290 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1291 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1292 			      bool removing)
1293 {
1294 	struct mlxsw_sp_nexthop *nh;
1295 
1296 	/* Take RTNL mutex here to prevent lists from changes */
1297 	rtnl_lock();
1298 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
1299 			    neigh_list_node) {
1300 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
1301 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1302 	}
1303 	rtnl_unlock();
1304 }
1305 
1306 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1307 				 struct mlxsw_sp_nexthop_group *nh_grp,
1308 				 struct mlxsw_sp_nexthop *nh,
1309 				 struct fib_nh *fib_nh)
1310 {
1311 	struct mlxsw_sp_neigh_entry *neigh_entry;
1312 	u32 gwip = ntohl(fib_nh->nh_gw);
1313 	struct net_device *dev = fib_nh->nh_dev;
1314 	struct neighbour *n;
1315 	u8 nud_state;
1316 
1317 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
1318 						  sizeof(gwip), dev);
1319 	if (!neigh_entry) {
1320 		__be32 gwipn = htonl(gwip);
1321 
1322 		n = neigh_create(&arp_tbl, &gwipn, dev);
1323 		if (IS_ERR(n))
1324 			return PTR_ERR(n);
1325 		neigh_event_send(n, NULL);
1326 		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
1327 							  sizeof(gwip), dev);
1328 		if (!neigh_entry) {
1329 			neigh_release(n);
1330 			return -EINVAL;
1331 		}
1332 	} else {
1333 		/* Take a reference of neigh here ensuring that neigh would
1334 		 * not be detructed before the nexthop entry is finished.
1335 		 * The second branch takes the reference in neith_create()
1336 		 */
1337 		n = neigh_entry->n;
1338 		neigh_clone(n);
1339 	}
1340 
1341 	/* If that is the first nexthop connected to that neigh, add to
1342 	 * nexthop_neighs_list
1343 	 */
1344 	if (list_empty(&neigh_entry->nexthop_list))
1345 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1346 			      &mlxsw_sp->router.nexthop_neighs_list);
1347 
1348 	nh->nh_grp = nh_grp;
1349 	nh->neigh_entry = neigh_entry;
1350 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1351 	read_lock_bh(&n->lock);
1352 	nud_state = n->nud_state;
1353 	read_unlock_bh(&n->lock);
1354 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID));
1355 
1356 	return 0;
1357 }
1358 
1359 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1360 				  struct mlxsw_sp_nexthop *nh)
1361 {
1362 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1363 
1364 	list_del(&nh->neigh_list_node);
1365 
1366 	/* If that is the last nexthop connected to that neigh, remove from
1367 	 * nexthop_neighs_list
1368 	 */
1369 	if (list_empty(&nh->neigh_entry->nexthop_list))
1370 		list_del(&nh->neigh_entry->nexthop_neighs_list_node);
1371 
1372 	neigh_release(neigh_entry->n);
1373 }
1374 
1375 static struct mlxsw_sp_nexthop_group *
1376 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1377 {
1378 	struct mlxsw_sp_nexthop_group *nh_grp;
1379 	struct mlxsw_sp_nexthop *nh;
1380 	struct fib_nh *fib_nh;
1381 	size_t alloc_size;
1382 	int i;
1383 	int err;
1384 
1385 	alloc_size = sizeof(*nh_grp) +
1386 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1387 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1388 	if (!nh_grp)
1389 		return ERR_PTR(-ENOMEM);
1390 	INIT_LIST_HEAD(&nh_grp->fib_list);
1391 	nh_grp->count = fi->fib_nhs;
1392 	for (i = 0; i < nh_grp->count; i++) {
1393 		nh = &nh_grp->nexthops[i];
1394 		fib_nh = &fi->fib_nh[i];
1395 		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1396 		if (err)
1397 			goto err_nexthop_init;
1398 	}
1399 	list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list);
1400 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1401 	return nh_grp;
1402 
1403 err_nexthop_init:
1404 	for (i--; i >= 0; i--)
1405 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1406 	kfree(nh_grp);
1407 	return ERR_PTR(err);
1408 }
1409 
1410 static void
1411 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1412 			       struct mlxsw_sp_nexthop_group *nh_grp)
1413 {
1414 	struct mlxsw_sp_nexthop *nh;
1415 	int i;
1416 
1417 	list_del(&nh_grp->list);
1418 	for (i = 0; i < nh_grp->count; i++) {
1419 		nh = &nh_grp->nexthops[i];
1420 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1421 	}
1422 	kfree(nh_grp);
1423 }
1424 
1425 static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
1426 				   struct fib_info *fi)
1427 {
1428 	int i;
1429 
1430 	for (i = 0; i < fi->fib_nhs; i++) {
1431 		struct fib_nh *fib_nh = &fi->fib_nh[i];
1432 		u32 gwip = ntohl(fib_nh->nh_gw);
1433 
1434 		if (memcmp(nh->neigh_entry->key.addr,
1435 			   &gwip, sizeof(u32)) == 0 &&
1436 		    nh->neigh_entry->key.dev == fib_nh->nh_dev)
1437 			return true;
1438 	}
1439 	return false;
1440 }
1441 
1442 static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp,
1443 					 struct fib_info *fi)
1444 {
1445 	int i;
1446 
1447 	if (nh_grp->count != fi->fib_nhs)
1448 		return false;
1449 	for (i = 0; i < nh_grp->count; i++) {
1450 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
1451 
1452 		if (!mlxsw_sp_nexthop_match(nh, fi))
1453 			return false;
1454 	}
1455 	return true;
1456 }
1457 
1458 static struct mlxsw_sp_nexthop_group *
1459 mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1460 {
1461 	struct mlxsw_sp_nexthop_group *nh_grp;
1462 
1463 	list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list,
1464 			    list) {
1465 		if (mlxsw_sp_nexthop_group_match(nh_grp, fi))
1466 			return nh_grp;
1467 	}
1468 	return NULL;
1469 }
1470 
1471 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1472 				      struct mlxsw_sp_fib_entry *fib_entry,
1473 				      struct fib_info *fi)
1474 {
1475 	struct mlxsw_sp_nexthop_group *nh_grp;
1476 
1477 	nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi);
1478 	if (!nh_grp) {
1479 		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1480 		if (IS_ERR(nh_grp))
1481 			return PTR_ERR(nh_grp);
1482 	}
1483 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1484 	fib_entry->nh_group = nh_grp;
1485 	return 0;
1486 }
1487 
1488 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1489 				       struct mlxsw_sp_fib_entry *fib_entry)
1490 {
1491 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1492 
1493 	list_del(&fib_entry->nexthop_group_node);
1494 	if (!list_empty(&nh_grp->fib_list))
1495 		return;
1496 	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1497 }
1498 
1499 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
1500 {
1501 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
1502 
1503 	mlxsw_reg_rgcr_pack(rgcr_pl, true);
1504 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX);
1505 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
1506 }
1507 
1508 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
1509 {
1510 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
1511 
1512 	mlxsw_reg_rgcr_pack(rgcr_pl, false);
1513 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
1514 }
1515 
1516 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
1517 {
1518 	int err;
1519 
1520 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
1521 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
1522 	err = __mlxsw_sp_router_init(mlxsw_sp);
1523 	if (err)
1524 		return err;
1525 	mlxsw_sp_lpm_init(mlxsw_sp);
1526 	mlxsw_sp_vrs_init(mlxsw_sp);
1527 	return mlxsw_sp_neigh_init(mlxsw_sp);
1528 }
1529 
1530 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
1531 {
1532 	mlxsw_sp_neigh_fini(mlxsw_sp);
1533 	__mlxsw_sp_router_fini(mlxsw_sp);
1534 }
1535 
1536 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1537 					 struct mlxsw_sp_fib_entry *fib_entry,
1538 					 enum mlxsw_reg_ralue_op op)
1539 {
1540 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1541 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1542 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1543 	enum mlxsw_reg_ralue_trap_action trap_action;
1544 	u16 trap_id = 0;
1545 	u32 adjacency_index = 0;
1546 	u16 ecmp_size = 0;
1547 
1548 	/* In case the nexthop group adjacency index is valid, use it
1549 	 * with provided ECMP size. Otherwise, setup trap and pass
1550 	 * traffic to kernel.
1551 	 */
1552 	if (fib_entry->nh_group->adj_index_valid) {
1553 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1554 		adjacency_index = fib_entry->nh_group->adj_index;
1555 		ecmp_size = fib_entry->nh_group->ecmp_size;
1556 	} else {
1557 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1558 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1559 	}
1560 
1561 	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
1562 			      fib_entry->key.prefix_len, *p_dip);
1563 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1564 					adjacency_index, ecmp_size);
1565 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1566 }
1567 
1568 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1569 					struct mlxsw_sp_fib_entry *fib_entry,
1570 					enum mlxsw_reg_ralue_op op)
1571 {
1572 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1573 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1574 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1575 
1576 	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
1577 			      fib_entry->key.prefix_len, *p_dip);
1578 	mlxsw_reg_ralue_act_local_pack(ralue_pl,
1579 				       MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
1580 				       fib_entry->rif);
1581 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1582 }
1583 
1584 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1585 				       struct mlxsw_sp_fib_entry *fib_entry,
1586 				       enum mlxsw_reg_ralue_op op)
1587 {
1588 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1589 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1590 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1591 
1592 	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
1593 			      fib_entry->key.prefix_len, *p_dip);
1594 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1595 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1596 }
1597 
1598 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1599 				  struct mlxsw_sp_fib_entry *fib_entry,
1600 				  enum mlxsw_reg_ralue_op op)
1601 {
1602 	switch (fib_entry->type) {
1603 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1604 		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1605 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1606 		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1607 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1608 		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1609 	}
1610 	return -EINVAL;
1611 }
1612 
1613 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1614 				 struct mlxsw_sp_fib_entry *fib_entry,
1615 				 enum mlxsw_reg_ralue_op op)
1616 {
1617 	switch (fib_entry->vr->proto) {
1618 	case MLXSW_SP_L3_PROTO_IPV4:
1619 		return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1620 	case MLXSW_SP_L3_PROTO_IPV6:
1621 		return -EINVAL;
1622 	}
1623 	return -EINVAL;
1624 }
1625 
1626 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1627 				     struct mlxsw_sp_fib_entry *fib_entry)
1628 {
1629 	enum mlxsw_reg_ralue_op op;
1630 
1631 	op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE :
1632 				 MLXSW_REG_RALUE_OP_WRITE_UPDATE;
1633 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
1634 }
1635 
1636 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1637 				  struct mlxsw_sp_fib_entry *fib_entry)
1638 {
1639 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1640 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
1641 }
1642 
1643 struct mlxsw_sp_router_fib4_add_info {
1644 	struct switchdev_trans_item tritem;
1645 	struct mlxsw_sp *mlxsw_sp;
1646 	struct mlxsw_sp_fib_entry *fib_entry;
1647 };
1648 
1649 static void mlxsw_sp_router_fib4_add_info_destroy(void const *data)
1650 {
1651 	const struct mlxsw_sp_router_fib4_add_info *info = data;
1652 	struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry;
1653 	struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp;
1654 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1655 
1656 	mlxsw_sp_fib_entry_destroy(fib_entry);
1657 	mlxsw_sp_vr_put(mlxsw_sp, vr);
1658 	kfree(info);
1659 }
1660 
1661 static int
1662 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
1663 				const struct switchdev_obj_ipv4_fib *fib4,
1664 				struct mlxsw_sp_fib_entry *fib_entry)
1665 {
1666 	struct fib_info *fi = fib4->fi;
1667 
1668 	if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) {
1669 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1670 		return 0;
1671 	}
1672 	if (fib4->type != RTN_UNICAST)
1673 		return -EINVAL;
1674 
1675 	if (fi->fib_scope != RT_SCOPE_UNIVERSE) {
1676 		struct mlxsw_sp_rif *r;
1677 
1678 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1679 		r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev);
1680 		if (!r)
1681 			return -EINVAL;
1682 		fib_entry->rif = r->rif;
1683 		return 0;
1684 	}
1685 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1686 	return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
1687 }
1688 
1689 static void
1690 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
1691 				struct mlxsw_sp_fib_entry *fib_entry)
1692 {
1693 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
1694 		return;
1695 	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1696 }
1697 
1698 static int
1699 mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
1700 				 const struct switchdev_obj_ipv4_fib *fib4,
1701 				 struct switchdev_trans *trans)
1702 {
1703 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1704 	struct mlxsw_sp_router_fib4_add_info *info;
1705 	struct mlxsw_sp_fib_entry *fib_entry;
1706 	struct mlxsw_sp_vr *vr;
1707 	int err;
1708 
1709 	vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id,
1710 			     MLXSW_SP_L3_PROTO_IPV4);
1711 	if (IS_ERR(vr))
1712 		return PTR_ERR(vr);
1713 
1714 	fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst,
1715 					      sizeof(fib4->dst), fib4->dst_len);
1716 	if (!fib_entry) {
1717 		err = -ENOMEM;
1718 		goto err_fib_entry_create;
1719 	}
1720 	fib_entry->vr = vr;
1721 
1722 	err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry);
1723 	if (err)
1724 		goto err_fib4_entry_init;
1725 
1726 	info = kmalloc(sizeof(*info), GFP_KERNEL);
1727 	if (!info) {
1728 		err = -ENOMEM;
1729 		goto err_alloc_info;
1730 	}
1731 	info->mlxsw_sp = mlxsw_sp;
1732 	info->fib_entry = fib_entry;
1733 	switchdev_trans_item_enqueue(trans, info,
1734 				     mlxsw_sp_router_fib4_add_info_destroy,
1735 				     &info->tritem);
1736 	return 0;
1737 
1738 err_alloc_info:
1739 	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
1740 err_fib4_entry_init:
1741 	mlxsw_sp_fib_entry_destroy(fib_entry);
1742 err_fib_entry_create:
1743 	mlxsw_sp_vr_put(mlxsw_sp, vr);
1744 	return err;
1745 }
1746 
1747 static int
1748 mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port,
1749 				const struct switchdev_obj_ipv4_fib *fib4,
1750 				struct switchdev_trans *trans)
1751 {
1752 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1753 	struct mlxsw_sp_router_fib4_add_info *info;
1754 	struct mlxsw_sp_fib_entry *fib_entry;
1755 	struct mlxsw_sp_vr *vr;
1756 	int err;
1757 
1758 	info = switchdev_trans_item_dequeue(trans);
1759 	fib_entry = info->fib_entry;
1760 	kfree(info);
1761 
1762 	vr = fib_entry->vr;
1763 	err = mlxsw_sp_fib_entry_insert(fib_entry->vr->fib, fib_entry);
1764 	if (err)
1765 		goto err_fib_entry_insert;
1766 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1767 	if (err)
1768 		goto err_fib_entry_add;
1769 	return 0;
1770 
1771 err_fib_entry_add:
1772 	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
1773 err_fib_entry_insert:
1774 	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
1775 	mlxsw_sp_fib_entry_destroy(fib_entry);
1776 	mlxsw_sp_vr_put(mlxsw_sp, vr);
1777 	return err;
1778 }
1779 
1780 int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
1781 			     const struct switchdev_obj_ipv4_fib *fib4,
1782 			     struct switchdev_trans *trans)
1783 {
1784 	if (switchdev_trans_ph_prepare(trans))
1785 		return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port,
1786 							fib4, trans);
1787 	return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port,
1788 					       fib4, trans);
1789 }
1790 
1791 int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
1792 			     const struct switchdev_obj_ipv4_fib *fib4)
1793 {
1794 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1795 	struct mlxsw_sp_fib_entry *fib_entry;
1796 	struct mlxsw_sp_vr *vr;
1797 
1798 	vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4);
1799 	if (!vr) {
1800 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to find virtual router for FIB4 entry being removed.\n");
1801 		return -ENOENT;
1802 	}
1803 	fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst,
1804 					      sizeof(fib4->dst), fib4->dst_len);
1805 	if (!fib_entry) {
1806 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n");
1807 		return -ENOENT;
1808 	}
1809 	mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry);
1810 	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
1811 	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
1812 	mlxsw_sp_fib_entry_destroy(fib_entry);
1813 	mlxsw_sp_vr_put(mlxsw_sp, vr);
1814 	return 0;
1815 }
1816