1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <net/netevent.h>
44 #include <net/neighbour.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
47 
48 #include "spectrum.h"
49 #include "core.h"
50 #include "reg.h"
51 
52 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
53 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
54 
55 static bool
56 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
57 			     struct mlxsw_sp_prefix_usage *prefix_usage2)
58 {
59 	unsigned char prefix;
60 
61 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
62 		if (!test_bit(prefix, prefix_usage2->b))
63 			return false;
64 	}
65 	return true;
66 }
67 
68 static bool
69 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
70 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
71 {
72 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
73 }
74 
75 static bool
76 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
77 {
78 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
79 
80 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
81 }
82 
83 static void
84 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
85 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
86 {
87 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
88 }
89 
90 static void
91 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
92 {
93 	memset(prefix_usage, 0, sizeof(*prefix_usage));
94 }
95 
96 static void
97 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
98 			  unsigned char prefix_len)
99 {
100 	set_bit(prefix_len, prefix_usage->b);
101 }
102 
103 static void
104 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
105 			    unsigned char prefix_len)
106 {
107 	clear_bit(prefix_len, prefix_usage->b);
108 }
109 
110 struct mlxsw_sp_fib_key {
111 	struct net_device *dev;
112 	unsigned char addr[sizeof(struct in6_addr)];
113 	unsigned char prefix_len;
114 };
115 
116 enum mlxsw_sp_fib_entry_type {
117 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
120 };
121 
122 struct mlxsw_sp_nexthop_group;
123 
124 struct mlxsw_sp_fib_entry {
125 	struct rhash_head ht_node;
126 	struct list_head list;
127 	struct mlxsw_sp_fib_key key;
128 	enum mlxsw_sp_fib_entry_type type;
129 	unsigned int ref_count;
130 	u16 rif; /* used for action local */
131 	struct mlxsw_sp_vr *vr;
132 	struct fib_info *fi;
133 	struct list_head nexthop_group_node;
134 	struct mlxsw_sp_nexthop_group *nh_group;
135 };
136 
137 struct mlxsw_sp_fib {
138 	struct rhashtable ht;
139 	struct list_head entry_list;
140 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
141 	struct mlxsw_sp_prefix_usage prefix_usage;
142 };
143 
144 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
145 	.key_offset = offsetof(struct mlxsw_sp_fib_entry, key),
146 	.head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node),
147 	.key_len = sizeof(struct mlxsw_sp_fib_key),
148 	.automatic_shrinking = true,
149 };
150 
151 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib,
152 				     struct mlxsw_sp_fib_entry *fib_entry)
153 {
154 	unsigned char prefix_len = fib_entry->key.prefix_len;
155 	int err;
156 
157 	err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node,
158 				     mlxsw_sp_fib_ht_params);
159 	if (err)
160 		return err;
161 	list_add_tail(&fib_entry->list, &fib->entry_list);
162 	if (fib->prefix_ref_count[prefix_len]++ == 0)
163 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
164 	return 0;
165 }
166 
167 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib,
168 				      struct mlxsw_sp_fib_entry *fib_entry)
169 {
170 	unsigned char prefix_len = fib_entry->key.prefix_len;
171 
172 	if (--fib->prefix_ref_count[prefix_len] == 0)
173 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
174 	list_del(&fib_entry->list);
175 	rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node,
176 			       mlxsw_sp_fib_ht_params);
177 }
178 
179 static struct mlxsw_sp_fib_entry *
180 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
181 			  size_t addr_len, unsigned char prefix_len,
182 			  struct net_device *dev)
183 {
184 	struct mlxsw_sp_fib_entry *fib_entry;
185 
186 	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
187 	if (!fib_entry)
188 		return NULL;
189 	fib_entry->key.dev = dev;
190 	memcpy(fib_entry->key.addr, addr, addr_len);
191 	fib_entry->key.prefix_len = prefix_len;
192 	return fib_entry;
193 }
194 
195 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry)
196 {
197 	kfree(fib_entry);
198 }
199 
200 static struct mlxsw_sp_fib_entry *
201 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
202 			  size_t addr_len, unsigned char prefix_len,
203 			  struct net_device *dev)
204 {
205 	struct mlxsw_sp_fib_key key;
206 
207 	memset(&key, 0, sizeof(key));
208 	key.dev = dev;
209 	memcpy(key.addr, addr, addr_len);
210 	key.prefix_len = prefix_len;
211 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
212 }
213 
214 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
215 {
216 	struct mlxsw_sp_fib *fib;
217 	int err;
218 
219 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
220 	if (!fib)
221 		return ERR_PTR(-ENOMEM);
222 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
223 	if (err)
224 		goto err_rhashtable_init;
225 	INIT_LIST_HEAD(&fib->entry_list);
226 	return fib;
227 
228 err_rhashtable_init:
229 	kfree(fib);
230 	return ERR_PTR(err);
231 }
232 
233 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
234 {
235 	rhashtable_destroy(&fib->ht);
236 	kfree(fib);
237 }
238 
239 static struct mlxsw_sp_lpm_tree *
240 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
241 {
242 	static struct mlxsw_sp_lpm_tree *lpm_tree;
243 	int i;
244 
245 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
246 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
247 		if (lpm_tree->ref_count == 0) {
248 			if (one_reserved)
249 				one_reserved = false;
250 			else
251 				return lpm_tree;
252 		}
253 	}
254 	return NULL;
255 }
256 
257 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
258 				   struct mlxsw_sp_lpm_tree *lpm_tree)
259 {
260 	char ralta_pl[MLXSW_REG_RALTA_LEN];
261 
262 	mlxsw_reg_ralta_pack(ralta_pl, true,
263 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
264 			     lpm_tree->id);
265 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
266 }
267 
268 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
269 				  struct mlxsw_sp_lpm_tree *lpm_tree)
270 {
271 	char ralta_pl[MLXSW_REG_RALTA_LEN];
272 
273 	mlxsw_reg_ralta_pack(ralta_pl, false,
274 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
275 			     lpm_tree->id);
276 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
277 }
278 
279 static int
280 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
281 				  struct mlxsw_sp_prefix_usage *prefix_usage,
282 				  struct mlxsw_sp_lpm_tree *lpm_tree)
283 {
284 	char ralst_pl[MLXSW_REG_RALST_LEN];
285 	u8 root_bin = 0;
286 	u8 prefix;
287 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
288 
289 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
290 		root_bin = prefix;
291 
292 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
293 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
294 		if (prefix == 0)
295 			continue;
296 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
297 					 MLXSW_REG_RALST_BIN_NO_CHILD);
298 		last_prefix = prefix;
299 	}
300 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
301 }
302 
303 static struct mlxsw_sp_lpm_tree *
304 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
305 			 struct mlxsw_sp_prefix_usage *prefix_usage,
306 			 enum mlxsw_sp_l3proto proto, bool one_reserved)
307 {
308 	struct mlxsw_sp_lpm_tree *lpm_tree;
309 	int err;
310 
311 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
312 	if (!lpm_tree)
313 		return ERR_PTR(-EBUSY);
314 	lpm_tree->proto = proto;
315 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
316 	if (err)
317 		return ERR_PTR(err);
318 
319 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
320 						lpm_tree);
321 	if (err)
322 		goto err_left_struct_set;
323 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
324 	       sizeof(lpm_tree->prefix_usage));
325 	return lpm_tree;
326 
327 err_left_struct_set:
328 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
329 	return ERR_PTR(err);
330 }
331 
332 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
333 				     struct mlxsw_sp_lpm_tree *lpm_tree)
334 {
335 	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
336 }
337 
338 static struct mlxsw_sp_lpm_tree *
339 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
340 		      struct mlxsw_sp_prefix_usage *prefix_usage,
341 		      enum mlxsw_sp_l3proto proto, bool one_reserved)
342 {
343 	struct mlxsw_sp_lpm_tree *lpm_tree;
344 	int i;
345 
346 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
347 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
348 		if (lpm_tree->ref_count != 0 &&
349 		    lpm_tree->proto == proto &&
350 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
351 					     prefix_usage))
352 			goto inc_ref_count;
353 	}
354 	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
355 					    proto, one_reserved);
356 	if (IS_ERR(lpm_tree))
357 		return lpm_tree;
358 
359 inc_ref_count:
360 	lpm_tree->ref_count++;
361 	return lpm_tree;
362 }
363 
364 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
365 				 struct mlxsw_sp_lpm_tree *lpm_tree)
366 {
367 	if (--lpm_tree->ref_count == 0)
368 		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
369 	return 0;
370 }
371 
372 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
373 {
374 	struct mlxsw_sp_lpm_tree *lpm_tree;
375 	int i;
376 
377 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
378 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
379 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
380 	}
381 }
382 
383 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
384 {
385 	struct mlxsw_resources *resources;
386 	struct mlxsw_sp_vr *vr;
387 	int i;
388 
389 	resources = mlxsw_core_resources_get(mlxsw_sp->core);
390 	for (i = 0; i < resources->max_virtual_routers; i++) {
391 		vr = &mlxsw_sp->router.vrs[i];
392 		if (!vr->used)
393 			return vr;
394 	}
395 	return NULL;
396 }
397 
398 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
399 				     struct mlxsw_sp_vr *vr)
400 {
401 	char raltb_pl[MLXSW_REG_RALTB_LEN];
402 
403 	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
404 			     (enum mlxsw_reg_ralxx_protocol) vr->proto,
405 			     vr->lpm_tree->id);
406 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
407 }
408 
409 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
410 				       struct mlxsw_sp_vr *vr)
411 {
412 	char raltb_pl[MLXSW_REG_RALTB_LEN];
413 
414 	/* Bind to tree 0 which is default */
415 	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
416 			     (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
417 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
418 }
419 
420 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
421 {
422 	/* For our purpose, squash main and local table into one */
423 	if (tb_id == RT_TABLE_LOCAL)
424 		tb_id = RT_TABLE_MAIN;
425 	return tb_id;
426 }
427 
428 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
429 					    u32 tb_id,
430 					    enum mlxsw_sp_l3proto proto)
431 {
432 	struct mlxsw_resources *resources;
433 	struct mlxsw_sp_vr *vr;
434 	int i;
435 
436 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
437 
438 	resources = mlxsw_core_resources_get(mlxsw_sp->core);
439 	for (i = 0; i < resources->max_virtual_routers; i++) {
440 		vr = &mlxsw_sp->router.vrs[i];
441 		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
442 			return vr;
443 	}
444 	return NULL;
445 }
446 
447 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
448 					      unsigned char prefix_len,
449 					      u32 tb_id,
450 					      enum mlxsw_sp_l3proto proto)
451 {
452 	struct mlxsw_sp_prefix_usage req_prefix_usage;
453 	struct mlxsw_sp_lpm_tree *lpm_tree;
454 	struct mlxsw_sp_vr *vr;
455 	int err;
456 
457 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
458 	if (!vr)
459 		return ERR_PTR(-EBUSY);
460 	vr->fib = mlxsw_sp_fib_create();
461 	if (IS_ERR(vr->fib))
462 		return ERR_CAST(vr->fib);
463 
464 	vr->proto = proto;
465 	vr->tb_id = tb_id;
466 	mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
467 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
468 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
469 					 proto, true);
470 	if (IS_ERR(lpm_tree)) {
471 		err = PTR_ERR(lpm_tree);
472 		goto err_tree_get;
473 	}
474 	vr->lpm_tree = lpm_tree;
475 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
476 	if (err)
477 		goto err_tree_bind;
478 
479 	vr->used = true;
480 	return vr;
481 
482 err_tree_bind:
483 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
484 err_tree_get:
485 	mlxsw_sp_fib_destroy(vr->fib);
486 
487 	return ERR_PTR(err);
488 }
489 
490 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
491 				struct mlxsw_sp_vr *vr)
492 {
493 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
494 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
495 	mlxsw_sp_fib_destroy(vr->fib);
496 	vr->used = false;
497 }
498 
499 static int
500 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
501 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
502 {
503 	struct mlxsw_sp_lpm_tree *lpm_tree;
504 
505 	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
506 				     &vr->lpm_tree->prefix_usage))
507 		return 0;
508 
509 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
510 					 vr->proto, false);
511 	if (IS_ERR(lpm_tree)) {
512 		/* We failed to get a tree according to the required
513 		 * prefix usage. However, the current tree might be still good
514 		 * for us if our requirement is subset of the prefixes used
515 		 * in the tree.
516 		 */
517 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
518 						 &vr->lpm_tree->prefix_usage))
519 			return 0;
520 		return PTR_ERR(lpm_tree);
521 	}
522 
523 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
524 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
525 	vr->lpm_tree = lpm_tree;
526 	return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
527 }
528 
529 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
530 					   unsigned char prefix_len,
531 					   u32 tb_id,
532 					   enum mlxsw_sp_l3proto proto)
533 {
534 	struct mlxsw_sp_vr *vr;
535 	int err;
536 
537 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
538 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
539 	if (!vr) {
540 		vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
541 		if (IS_ERR(vr))
542 			return vr;
543 	} else {
544 		struct mlxsw_sp_prefix_usage req_prefix_usage;
545 
546 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
547 					  &vr->fib->prefix_usage);
548 		mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
549 		/* Need to replace LPM tree in case new prefix is required. */
550 		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
551 						 &req_prefix_usage);
552 		if (err)
553 			return ERR_PTR(err);
554 	}
555 	return vr;
556 }
557 
558 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
559 {
560 	/* Destroy virtual router entity in case the associated FIB is empty
561 	 * and allow it to be used for other tables in future. Otherwise,
562 	 * check if some prefix usage did not disappear and change tree if
563 	 * that is the case. Note that in case new, smaller tree cannot be
564 	 * allocated, the original one will be kept being used.
565 	 */
566 	if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
567 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
568 	else
569 		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
570 					   &vr->fib->prefix_usage);
571 }
572 
573 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
574 {
575 	struct mlxsw_resources *resources;
576 	struct mlxsw_sp_vr *vr;
577 	int i;
578 
579 	resources = mlxsw_core_resources_get(mlxsw_sp->core);
580 	if (!resources->max_virtual_routers_valid)
581 		return -EIO;
582 
583 	mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers,
584 				       sizeof(struct mlxsw_sp_vr),
585 				       GFP_KERNEL);
586 	if (!mlxsw_sp->router.vrs)
587 		return -ENOMEM;
588 
589 	for (i = 0; i < resources->max_virtual_routers; i++) {
590 		vr = &mlxsw_sp->router.vrs[i];
591 		vr->id = i;
592 	}
593 
594 	return 0;
595 }
596 
597 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
598 {
599 	kfree(mlxsw_sp->router.vrs);
600 }
601 
602 struct mlxsw_sp_neigh_key {
603 	unsigned char addr[sizeof(struct in6_addr)];
604 	struct net_device *dev;
605 };
606 
607 struct mlxsw_sp_neigh_entry {
608 	struct rhash_head ht_node;
609 	struct mlxsw_sp_neigh_key key;
610 	u16 rif;
611 	struct neighbour *n;
612 	bool offloaded;
613 	struct delayed_work dw;
614 	struct mlxsw_sp_port *mlxsw_sp_port;
615 	unsigned char ha[ETH_ALEN];
616 	struct list_head nexthop_list; /* list of nexthops using
617 					* this neigh entry
618 					*/
619 	struct list_head nexthop_neighs_list_node;
620 };
621 
622 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
623 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
624 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
625 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
626 };
627 
628 static int
629 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
630 			    struct mlxsw_sp_neigh_entry *neigh_entry)
631 {
632 	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
633 				      &neigh_entry->ht_node,
634 				      mlxsw_sp_neigh_ht_params);
635 }
636 
637 static void
638 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
639 			    struct mlxsw_sp_neigh_entry *neigh_entry)
640 {
641 	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
642 			       &neigh_entry->ht_node,
643 			       mlxsw_sp_neigh_ht_params);
644 }
645 
646 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work);
647 
648 static struct mlxsw_sp_neigh_entry *
649 mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
650 			    struct net_device *dev, u16 rif,
651 			    struct neighbour *n)
652 {
653 	struct mlxsw_sp_neigh_entry *neigh_entry;
654 
655 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
656 	if (!neigh_entry)
657 		return NULL;
658 	memcpy(neigh_entry->key.addr, addr, addr_len);
659 	neigh_entry->key.dev = dev;
660 	neigh_entry->rif = rif;
661 	neigh_entry->n = n;
662 	INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
663 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
664 	return neigh_entry;
665 }
666 
667 static void
668 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry)
669 {
670 	kfree(neigh_entry);
671 }
672 
673 static struct mlxsw_sp_neigh_entry *
674 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr,
675 			    size_t addr_len, struct net_device *dev)
676 {
677 	struct mlxsw_sp_neigh_key key = {{ 0 } };
678 
679 	memcpy(key.addr, addr, addr_len);
680 	key.dev = dev;
681 	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
682 				      &key, mlxsw_sp_neigh_ht_params);
683 }
684 
685 int mlxsw_sp_router_neigh_construct(struct net_device *dev,
686 				    struct neighbour *n)
687 {
688 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
689 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
690 	struct mlxsw_sp_neigh_entry *neigh_entry;
691 	struct mlxsw_sp_rif *r;
692 	u32 dip;
693 	int err;
694 
695 	if (n->tbl != &arp_tbl)
696 		return 0;
697 
698 	dip = ntohl(*((__be32 *) n->primary_key));
699 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
700 						  n->dev);
701 	if (neigh_entry) {
702 		WARN_ON(neigh_entry->n != n);
703 		return 0;
704 	}
705 
706 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
707 	if (WARN_ON(!r))
708 		return -EINVAL;
709 
710 	neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev,
711 						  r->rif, n);
712 	if (!neigh_entry)
713 		return -ENOMEM;
714 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
715 	if (err)
716 		goto err_neigh_entry_insert;
717 	return 0;
718 
719 err_neigh_entry_insert:
720 	mlxsw_sp_neigh_entry_destroy(neigh_entry);
721 	return err;
722 }
723 
724 void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
725 				   struct neighbour *n)
726 {
727 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
728 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
729 	struct mlxsw_sp_neigh_entry *neigh_entry;
730 	u32 dip;
731 
732 	if (n->tbl != &arp_tbl)
733 		return;
734 
735 	dip = ntohl(*((__be32 *) n->primary_key));
736 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
737 						  n->dev);
738 	if (!neigh_entry)
739 		return;
740 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
741 	mlxsw_sp_neigh_entry_destroy(neigh_entry);
742 }
743 
744 static void
745 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
746 {
747 	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
748 
749 	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
750 }
751 
752 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
753 						   char *rauhtd_pl,
754 						   int ent_index)
755 {
756 	struct net_device *dev;
757 	struct neighbour *n;
758 	__be32 dipn;
759 	u32 dip;
760 	u16 rif;
761 
762 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
763 
764 	if (!mlxsw_sp->rifs[rif]) {
765 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
766 		return;
767 	}
768 
769 	dipn = htonl(dip);
770 	dev = mlxsw_sp->rifs[rif]->dev;
771 	n = neigh_lookup(&arp_tbl, &dipn, dev);
772 	if (!n) {
773 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
774 			   &dip);
775 		return;
776 	}
777 
778 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
779 	neigh_event_send(n, NULL);
780 	neigh_release(n);
781 }
782 
783 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
784 						   char *rauhtd_pl,
785 						   int rec_index)
786 {
787 	u8 num_entries;
788 	int i;
789 
790 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
791 								rec_index);
792 	/* Hardware starts counting at 0, so add 1. */
793 	num_entries++;
794 
795 	/* Each record consists of several neighbour entries. */
796 	for (i = 0; i < num_entries; i++) {
797 		int ent_index;
798 
799 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
800 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
801 						       ent_index);
802 	}
803 
804 }
805 
806 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
807 					      char *rauhtd_pl, int rec_index)
808 {
809 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
810 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
811 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
812 						       rec_index);
813 		break;
814 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
815 		WARN_ON_ONCE(1);
816 		break;
817 	}
818 }
819 
820 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
821 {
822 	char *rauhtd_pl;
823 	u8 num_rec;
824 	int i, err;
825 
826 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
827 	if (!rauhtd_pl)
828 		return -ENOMEM;
829 
830 	/* Make sure the neighbour's netdev isn't removed in the
831 	 * process.
832 	 */
833 	rtnl_lock();
834 	do {
835 		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
836 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
837 				      rauhtd_pl);
838 		if (err) {
839 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
840 			break;
841 		}
842 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
843 		for (i = 0; i < num_rec; i++)
844 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
845 							  i);
846 	} while (num_rec);
847 	rtnl_unlock();
848 
849 	kfree(rauhtd_pl);
850 	return err;
851 }
852 
853 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
854 {
855 	struct mlxsw_sp_neigh_entry *neigh_entry;
856 
857 	/* Take RTNL mutex here to prevent lists from changes */
858 	rtnl_lock();
859 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
860 			    nexthop_neighs_list_node) {
861 		/* If this neigh have nexthops, make the kernel think this neigh
862 		 * is active regardless of the traffic.
863 		 */
864 		if (!list_empty(&neigh_entry->nexthop_list))
865 			neigh_event_send(neigh_entry->n, NULL);
866 	}
867 	rtnl_unlock();
868 }
869 
870 static void
871 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
872 {
873 	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
874 
875 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
876 			       msecs_to_jiffies(interval));
877 }
878 
879 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
880 {
881 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
882 						 router.neighs_update.dw.work);
883 	int err;
884 
885 	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
886 	if (err)
887 		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
888 
889 	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
890 
891 	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
892 }
893 
894 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
895 {
896 	struct mlxsw_sp_neigh_entry *neigh_entry;
897 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
898 						 router.nexthop_probe_dw.work);
899 
900 	/* Iterate over nexthop neighbours, find those who are unresolved and
901 	 * send arp on them. This solves the chicken-egg problem when
902 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
903 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
904 	 * using different nexthop.
905 	 *
906 	 * Take RTNL mutex here to prevent lists from changes.
907 	 */
908 	rtnl_lock();
909 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
910 			    nexthop_neighs_list_node) {
911 		if (!(neigh_entry->n->nud_state & NUD_VALID) &&
912 		    !list_empty(&neigh_entry->nexthop_list))
913 			neigh_event_send(neigh_entry->n, NULL);
914 	}
915 	rtnl_unlock();
916 
917 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
918 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
919 }
920 
921 static void
922 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
923 			      struct mlxsw_sp_neigh_entry *neigh_entry,
924 			      bool removing);
925 
926 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
927 {
928 	struct mlxsw_sp_neigh_entry *neigh_entry =
929 		container_of(work, struct mlxsw_sp_neigh_entry, dw.work);
930 	struct neighbour *n = neigh_entry->n;
931 	struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port;
932 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
933 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
934 	struct net_device *dev;
935 	bool entry_connected;
936 	u8 nud_state;
937 	bool updating;
938 	bool removing;
939 	bool adding;
940 	u32 dip;
941 	int err;
942 
943 	read_lock_bh(&n->lock);
944 	dip = ntohl(*((__be32 *) n->primary_key));
945 	memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha));
946 	nud_state = n->nud_state;
947 	dev = n->dev;
948 	read_unlock_bh(&n->lock);
949 
950 	entry_connected = nud_state & NUD_VALID;
951 	adding = (!neigh_entry->offloaded) && entry_connected;
952 	updating = neigh_entry->offloaded && entry_connected;
953 	removing = neigh_entry->offloaded && !entry_connected;
954 
955 	if (adding || updating) {
956 		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD,
957 				      neigh_entry->rif,
958 				      neigh_entry->ha, dip);
959 		err = mlxsw_reg_write(mlxsw_sp->core,
960 				      MLXSW_REG(rauht), rauht_pl);
961 		if (err) {
962 			netdev_err(dev, "Could not add neigh %pI4h\n", &dip);
963 			neigh_entry->offloaded = false;
964 		} else {
965 			neigh_entry->offloaded = true;
966 		}
967 		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
968 	} else if (removing) {
969 		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
970 				      neigh_entry->rif,
971 				      neigh_entry->ha, dip);
972 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht),
973 				      rauht_pl);
974 		if (err) {
975 			netdev_err(dev, "Could not delete neigh %pI4h\n", &dip);
976 			neigh_entry->offloaded = true;
977 		} else {
978 			neigh_entry->offloaded = false;
979 		}
980 		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
981 	}
982 
983 	neigh_release(n);
984 	mlxsw_sp_port_dev_put(mlxsw_sp_port);
985 }
986 
987 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
988 				   unsigned long event, void *ptr)
989 {
990 	struct mlxsw_sp_neigh_entry *neigh_entry;
991 	struct mlxsw_sp_port *mlxsw_sp_port;
992 	struct mlxsw_sp *mlxsw_sp;
993 	unsigned long interval;
994 	struct net_device *dev;
995 	struct neigh_parms *p;
996 	struct neighbour *n;
997 	u32 dip;
998 
999 	switch (event) {
1000 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1001 		p = ptr;
1002 
1003 		/* We don't care about changes in the default table. */
1004 		if (!p->dev || p->tbl != &arp_tbl)
1005 			return NOTIFY_DONE;
1006 
1007 		/* We are in atomic context and can't take RTNL mutex,
1008 		 * so use RCU variant to walk the device chain.
1009 		 */
1010 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1011 		if (!mlxsw_sp_port)
1012 			return NOTIFY_DONE;
1013 
1014 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1015 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1016 		mlxsw_sp->router.neighs_update.interval = interval;
1017 
1018 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
1019 		break;
1020 	case NETEVENT_NEIGH_UPDATE:
1021 		n = ptr;
1022 		dev = n->dev;
1023 
1024 		if (n->tbl != &arp_tbl)
1025 			return NOTIFY_DONE;
1026 
1027 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev);
1028 		if (!mlxsw_sp_port)
1029 			return NOTIFY_DONE;
1030 
1031 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1032 		dip = ntohl(*((__be32 *) n->primary_key));
1033 		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp,
1034 							  &dip,
1035 							  sizeof(__be32),
1036 							  dev);
1037 		if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) {
1038 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
1039 			return NOTIFY_DONE;
1040 		}
1041 		neigh_entry->mlxsw_sp_port = mlxsw_sp_port;
1042 
1043 		/* Take a reference to ensure the neighbour won't be
1044 		 * destructed until we drop the reference in delayed
1045 		 * work.
1046 		 */
1047 		neigh_clone(n);
1048 		if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) {
1049 			neigh_release(n);
1050 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
1051 		}
1052 		break;
1053 	}
1054 
1055 	return NOTIFY_DONE;
1056 }
1057 
1058 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1059 {
1060 	int err;
1061 
1062 	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1063 			      &mlxsw_sp_neigh_ht_params);
1064 	if (err)
1065 		return err;
1066 
1067 	/* Initialize the polling interval according to the default
1068 	 * table.
1069 	 */
1070 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1071 
1072 	/* Create the delayed works for the activity_update */
1073 	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1074 			  mlxsw_sp_router_neighs_update_work);
1075 	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1076 			  mlxsw_sp_router_probe_unresolved_nexthops);
1077 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1078 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1079 	return 0;
1080 }
1081 
1082 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1083 {
1084 	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1085 	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1086 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1087 }
1088 
1089 struct mlxsw_sp_nexthop {
1090 	struct list_head neigh_list_node; /* member of neigh entry list */
1091 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1092 						* this belongs to
1093 						*/
1094 	u8 should_offload:1, /* set indicates this neigh is connected and
1095 			      * should be put to KVD linear area of this group.
1096 			      */
1097 	   offloaded:1, /* set in case the neigh is actually put into
1098 			 * KVD linear area of this group.
1099 			 */
1100 	   update:1; /* set indicates that MAC of this neigh should be
1101 		      * updated in HW
1102 		      */
1103 	struct mlxsw_sp_neigh_entry *neigh_entry;
1104 };
1105 
1106 struct mlxsw_sp_nexthop_group {
1107 	struct list_head list; /* node in mlxsw->router.nexthop_group_list */
1108 	struct list_head fib_list; /* list of fib entries that use this group */
1109 	u8 adj_index_valid:1;
1110 	u32 adj_index;
1111 	u16 ecmp_size;
1112 	u16 count;
1113 	struct mlxsw_sp_nexthop nexthops[0];
1114 };
1115 
1116 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1117 					     struct mlxsw_sp_vr *vr,
1118 					     u32 adj_index, u16 ecmp_size,
1119 					     u32 new_adj_index,
1120 					     u16 new_ecmp_size)
1121 {
1122 	char raleu_pl[MLXSW_REG_RALEU_LEN];
1123 
1124 	mlxsw_reg_raleu_pack(raleu_pl,
1125 			     (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1126 			     adj_index, ecmp_size, new_adj_index,
1127 			     new_ecmp_size);
1128 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1129 }
1130 
1131 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1132 					  struct mlxsw_sp_nexthop_group *nh_grp,
1133 					  u32 old_adj_index, u16 old_ecmp_size)
1134 {
1135 	struct mlxsw_sp_fib_entry *fib_entry;
1136 	struct mlxsw_sp_vr *vr = NULL;
1137 	int err;
1138 
1139 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1140 		if (vr == fib_entry->vr)
1141 			continue;
1142 		vr = fib_entry->vr;
1143 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1144 							old_adj_index,
1145 							old_ecmp_size,
1146 							nh_grp->adj_index,
1147 							nh_grp->ecmp_size);
1148 		if (err)
1149 			return err;
1150 	}
1151 	return 0;
1152 }
1153 
1154 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1155 				       struct mlxsw_sp_nexthop *nh)
1156 {
1157 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1158 	char ratr_pl[MLXSW_REG_RATR_LEN];
1159 
1160 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1161 			    true, adj_index, neigh_entry->rif);
1162 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1163 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1164 }
1165 
1166 static int
1167 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1168 				  struct mlxsw_sp_nexthop_group *nh_grp)
1169 {
1170 	u32 adj_index = nh_grp->adj_index; /* base */
1171 	struct mlxsw_sp_nexthop *nh;
1172 	int i;
1173 	int err;
1174 
1175 	for (i = 0; i < nh_grp->count; i++) {
1176 		nh = &nh_grp->nexthops[i];
1177 
1178 		if (!nh->should_offload) {
1179 			nh->offloaded = 0;
1180 			continue;
1181 		}
1182 
1183 		if (nh->update) {
1184 			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1185 							  adj_index, nh);
1186 			if (err)
1187 				return err;
1188 			nh->update = 0;
1189 			nh->offloaded = 1;
1190 		}
1191 		adj_index++;
1192 	}
1193 	return 0;
1194 }
1195 
1196 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1197 				     struct mlxsw_sp_fib_entry *fib_entry);
1198 
1199 static int
1200 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1201 				    struct mlxsw_sp_nexthop_group *nh_grp)
1202 {
1203 	struct mlxsw_sp_fib_entry *fib_entry;
1204 	int err;
1205 
1206 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1207 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1208 		if (err)
1209 			return err;
1210 	}
1211 	return 0;
1212 }
1213 
1214 static void
1215 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1216 			       struct mlxsw_sp_nexthop_group *nh_grp)
1217 {
1218 	struct mlxsw_sp_nexthop *nh;
1219 	bool offload_change = false;
1220 	u32 adj_index;
1221 	u16 ecmp_size = 0;
1222 	bool old_adj_index_valid;
1223 	u32 old_adj_index;
1224 	u16 old_ecmp_size;
1225 	int ret;
1226 	int i;
1227 	int err;
1228 
1229 	for (i = 0; i < nh_grp->count; i++) {
1230 		nh = &nh_grp->nexthops[i];
1231 
1232 		if (nh->should_offload ^ nh->offloaded) {
1233 			offload_change = true;
1234 			if (nh->should_offload)
1235 				nh->update = 1;
1236 		}
1237 		if (nh->should_offload)
1238 			ecmp_size++;
1239 	}
1240 	if (!offload_change) {
1241 		/* Nothing was added or removed, so no need to reallocate. Just
1242 		 * update MAC on existing adjacency indexes.
1243 		 */
1244 		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
1245 		if (err) {
1246 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1247 			goto set_trap;
1248 		}
1249 		return;
1250 	}
1251 	if (!ecmp_size)
1252 		/* No neigh of this group is connected so we just set
1253 		 * the trap and let everthing flow through kernel.
1254 		 */
1255 		goto set_trap;
1256 
1257 	ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1258 	if (ret < 0) {
1259 		/* We ran out of KVD linear space, just set the
1260 		 * trap and let everything flow through kernel.
1261 		 */
1262 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1263 		goto set_trap;
1264 	}
1265 	adj_index = ret;
1266 	old_adj_index_valid = nh_grp->adj_index_valid;
1267 	old_adj_index = nh_grp->adj_index;
1268 	old_ecmp_size = nh_grp->ecmp_size;
1269 	nh_grp->adj_index_valid = 1;
1270 	nh_grp->adj_index = adj_index;
1271 	nh_grp->ecmp_size = ecmp_size;
1272 	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
1273 	if (err) {
1274 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1275 		goto set_trap;
1276 	}
1277 
1278 	if (!old_adj_index_valid) {
1279 		/* The trap was set for fib entries, so we have to call
1280 		 * fib entry update to unset it and use adjacency index.
1281 		 */
1282 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1283 		if (err) {
1284 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1285 			goto set_trap;
1286 		}
1287 		return;
1288 	}
1289 
1290 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1291 					     old_adj_index, old_ecmp_size);
1292 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1293 	if (err) {
1294 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1295 		goto set_trap;
1296 	}
1297 	return;
1298 
1299 set_trap:
1300 	old_adj_index_valid = nh_grp->adj_index_valid;
1301 	nh_grp->adj_index_valid = 0;
1302 	for (i = 0; i < nh_grp->count; i++) {
1303 		nh = &nh_grp->nexthops[i];
1304 		nh->offloaded = 0;
1305 	}
1306 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1307 	if (err)
1308 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1309 	if (old_adj_index_valid)
1310 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1311 }
1312 
1313 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1314 					    bool removing)
1315 {
1316 	if (!removing && !nh->should_offload)
1317 		nh->should_offload = 1;
1318 	else if (removing && nh->offloaded)
1319 		nh->should_offload = 0;
1320 	nh->update = 1;
1321 }
1322 
1323 static void
1324 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1325 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1326 			      bool removing)
1327 {
1328 	struct mlxsw_sp_nexthop *nh;
1329 
1330 	/* Take RTNL mutex here to prevent lists from changes */
1331 	rtnl_lock();
1332 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
1333 			    neigh_list_node) {
1334 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
1335 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1336 	}
1337 	rtnl_unlock();
1338 }
1339 
1340 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1341 				 struct mlxsw_sp_nexthop_group *nh_grp,
1342 				 struct mlxsw_sp_nexthop *nh,
1343 				 struct fib_nh *fib_nh)
1344 {
1345 	struct mlxsw_sp_neigh_entry *neigh_entry;
1346 	u32 gwip = ntohl(fib_nh->nh_gw);
1347 	struct net_device *dev = fib_nh->nh_dev;
1348 	struct neighbour *n;
1349 	u8 nud_state;
1350 
1351 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
1352 						  sizeof(gwip), dev);
1353 	if (!neigh_entry) {
1354 		__be32 gwipn = htonl(gwip);
1355 
1356 		n = neigh_create(&arp_tbl, &gwipn, dev);
1357 		if (IS_ERR(n))
1358 			return PTR_ERR(n);
1359 		neigh_event_send(n, NULL);
1360 		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
1361 							  sizeof(gwip), dev);
1362 		if (!neigh_entry) {
1363 			neigh_release(n);
1364 			return -EINVAL;
1365 		}
1366 	} else {
1367 		/* Take a reference of neigh here ensuring that neigh would
1368 		 * not be detructed before the nexthop entry is finished.
1369 		 * The second branch takes the reference in neith_create()
1370 		 */
1371 		n = neigh_entry->n;
1372 		neigh_clone(n);
1373 	}
1374 
1375 	/* If that is the first nexthop connected to that neigh, add to
1376 	 * nexthop_neighs_list
1377 	 */
1378 	if (list_empty(&neigh_entry->nexthop_list))
1379 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1380 			      &mlxsw_sp->router.nexthop_neighs_list);
1381 
1382 	nh->nh_grp = nh_grp;
1383 	nh->neigh_entry = neigh_entry;
1384 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1385 	read_lock_bh(&n->lock);
1386 	nud_state = n->nud_state;
1387 	read_unlock_bh(&n->lock);
1388 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID));
1389 
1390 	return 0;
1391 }
1392 
1393 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1394 				  struct mlxsw_sp_nexthop *nh)
1395 {
1396 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1397 
1398 	list_del(&nh->neigh_list_node);
1399 
1400 	/* If that is the last nexthop connected to that neigh, remove from
1401 	 * nexthop_neighs_list
1402 	 */
1403 	if (list_empty(&nh->neigh_entry->nexthop_list))
1404 		list_del(&nh->neigh_entry->nexthop_neighs_list_node);
1405 
1406 	neigh_release(neigh_entry->n);
1407 }
1408 
1409 static struct mlxsw_sp_nexthop_group *
1410 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1411 {
1412 	struct mlxsw_sp_nexthop_group *nh_grp;
1413 	struct mlxsw_sp_nexthop *nh;
1414 	struct fib_nh *fib_nh;
1415 	size_t alloc_size;
1416 	int i;
1417 	int err;
1418 
1419 	alloc_size = sizeof(*nh_grp) +
1420 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1421 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1422 	if (!nh_grp)
1423 		return ERR_PTR(-ENOMEM);
1424 	INIT_LIST_HEAD(&nh_grp->fib_list);
1425 	nh_grp->count = fi->fib_nhs;
1426 	for (i = 0; i < nh_grp->count; i++) {
1427 		nh = &nh_grp->nexthops[i];
1428 		fib_nh = &fi->fib_nh[i];
1429 		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1430 		if (err)
1431 			goto err_nexthop_init;
1432 	}
1433 	list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list);
1434 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1435 	return nh_grp;
1436 
1437 err_nexthop_init:
1438 	for (i--; i >= 0; i--)
1439 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1440 	kfree(nh_grp);
1441 	return ERR_PTR(err);
1442 }
1443 
1444 static void
1445 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1446 			       struct mlxsw_sp_nexthop_group *nh_grp)
1447 {
1448 	struct mlxsw_sp_nexthop *nh;
1449 	int i;
1450 
1451 	list_del(&nh_grp->list);
1452 	for (i = 0; i < nh_grp->count; i++) {
1453 		nh = &nh_grp->nexthops[i];
1454 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1455 	}
1456 	kfree(nh_grp);
1457 }
1458 
1459 static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
1460 				   struct fib_info *fi)
1461 {
1462 	int i;
1463 
1464 	for (i = 0; i < fi->fib_nhs; i++) {
1465 		struct fib_nh *fib_nh = &fi->fib_nh[i];
1466 		u32 gwip = ntohl(fib_nh->nh_gw);
1467 
1468 		if (memcmp(nh->neigh_entry->key.addr,
1469 			   &gwip, sizeof(u32)) == 0 &&
1470 		    nh->neigh_entry->key.dev == fib_nh->nh_dev)
1471 			return true;
1472 	}
1473 	return false;
1474 }
1475 
1476 static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp,
1477 					 struct fib_info *fi)
1478 {
1479 	int i;
1480 
1481 	if (nh_grp->count != fi->fib_nhs)
1482 		return false;
1483 	for (i = 0; i < nh_grp->count; i++) {
1484 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
1485 
1486 		if (!mlxsw_sp_nexthop_match(nh, fi))
1487 			return false;
1488 	}
1489 	return true;
1490 }
1491 
1492 static struct mlxsw_sp_nexthop_group *
1493 mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1494 {
1495 	struct mlxsw_sp_nexthop_group *nh_grp;
1496 
1497 	list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list,
1498 			    list) {
1499 		if (mlxsw_sp_nexthop_group_match(nh_grp, fi))
1500 			return nh_grp;
1501 	}
1502 	return NULL;
1503 }
1504 
1505 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1506 				      struct mlxsw_sp_fib_entry *fib_entry,
1507 				      struct fib_info *fi)
1508 {
1509 	struct mlxsw_sp_nexthop_group *nh_grp;
1510 
1511 	nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi);
1512 	if (!nh_grp) {
1513 		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1514 		if (IS_ERR(nh_grp))
1515 			return PTR_ERR(nh_grp);
1516 	}
1517 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1518 	fib_entry->nh_group = nh_grp;
1519 	return 0;
1520 }
1521 
1522 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1523 				       struct mlxsw_sp_fib_entry *fib_entry)
1524 {
1525 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1526 
1527 	list_del(&fib_entry->nexthop_group_node);
1528 	if (!list_empty(&nh_grp->fib_list))
1529 		return;
1530 	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1531 }
1532 
1533 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1534 					 struct mlxsw_sp_fib_entry *fib_entry,
1535 					 enum mlxsw_reg_ralue_op op)
1536 {
1537 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1538 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1539 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1540 	enum mlxsw_reg_ralue_trap_action trap_action;
1541 	u16 trap_id = 0;
1542 	u32 adjacency_index = 0;
1543 	u16 ecmp_size = 0;
1544 
1545 	/* In case the nexthop group adjacency index is valid, use it
1546 	 * with provided ECMP size. Otherwise, setup trap and pass
1547 	 * traffic to kernel.
1548 	 */
1549 	if (fib_entry->nh_group->adj_index_valid) {
1550 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1551 		adjacency_index = fib_entry->nh_group->adj_index;
1552 		ecmp_size = fib_entry->nh_group->ecmp_size;
1553 	} else {
1554 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1555 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1556 	}
1557 
1558 	mlxsw_reg_ralue_pack4(ralue_pl,
1559 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1560 			      vr->id, fib_entry->key.prefix_len, *p_dip);
1561 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1562 					adjacency_index, ecmp_size);
1563 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1564 }
1565 
1566 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1567 					struct mlxsw_sp_fib_entry *fib_entry,
1568 					enum mlxsw_reg_ralue_op op)
1569 {
1570 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1571 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1572 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1573 
1574 	mlxsw_reg_ralue_pack4(ralue_pl,
1575 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1576 			      vr->id, fib_entry->key.prefix_len, *p_dip);
1577 	mlxsw_reg_ralue_act_local_pack(ralue_pl,
1578 				       MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
1579 				       fib_entry->rif);
1580 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1581 }
1582 
1583 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1584 				       struct mlxsw_sp_fib_entry *fib_entry,
1585 				       enum mlxsw_reg_ralue_op op)
1586 {
1587 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1588 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1589 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1590 
1591 	mlxsw_reg_ralue_pack4(ralue_pl,
1592 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1593 			      vr->id, fib_entry->key.prefix_len, *p_dip);
1594 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1595 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1596 }
1597 
1598 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1599 				  struct mlxsw_sp_fib_entry *fib_entry,
1600 				  enum mlxsw_reg_ralue_op op)
1601 {
1602 	switch (fib_entry->type) {
1603 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1604 		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1605 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1606 		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1607 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1608 		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1609 	}
1610 	return -EINVAL;
1611 }
1612 
1613 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1614 				 struct mlxsw_sp_fib_entry *fib_entry,
1615 				 enum mlxsw_reg_ralue_op op)
1616 {
1617 	switch (fib_entry->vr->proto) {
1618 	case MLXSW_SP_L3_PROTO_IPV4:
1619 		return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1620 	case MLXSW_SP_L3_PROTO_IPV6:
1621 		return -EINVAL;
1622 	}
1623 	return -EINVAL;
1624 }
1625 
1626 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1627 				     struct mlxsw_sp_fib_entry *fib_entry)
1628 {
1629 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1630 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
1631 }
1632 
1633 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1634 				  struct mlxsw_sp_fib_entry *fib_entry)
1635 {
1636 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1637 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
1638 }
1639 
1640 static int
1641 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
1642 				const struct fib_entry_notifier_info *fen_info,
1643 				struct mlxsw_sp_fib_entry *fib_entry)
1644 {
1645 	struct fib_info *fi = fen_info->fi;
1646 	struct mlxsw_sp_rif *r = NULL;
1647 	int nhsel;
1648 	int err;
1649 
1650 	if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1651 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1652 		return 0;
1653 	}
1654 	if (fen_info->type != RTN_UNICAST)
1655 		return -EINVAL;
1656 
1657 	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1658 		const struct fib_nh *nh = &fi->fib_nh[nhsel];
1659 
1660 		if (!nh->nh_dev)
1661 			continue;
1662 		r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev);
1663 		if (!r) {
1664 			/* In case router interface is not found for
1665 			 * at least one of the nexthops, that means
1666 			 * the nexthop points to some device unrelated
1667 			 * to us. Set trap and pass the packets for
1668 			 * this prefix to kernel.
1669 			 */
1670 			break;
1671 		}
1672 	}
1673 
1674 	if (!r) {
1675 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1676 		return 0;
1677 	}
1678 
1679 	if (fi->fib_scope != RT_SCOPE_UNIVERSE) {
1680 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1681 		fib_entry->rif = r->rif;
1682 	} else {
1683 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1684 		err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
1685 		if (err)
1686 			return err;
1687 	}
1688 	fib_info_offload_inc(fen_info->fi);
1689 	return 0;
1690 }
1691 
1692 static void
1693 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
1694 				struct mlxsw_sp_fib_entry *fib_entry)
1695 {
1696 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1697 		fib_info_offload_dec(fib_entry->fi);
1698 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
1699 		mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1700 }
1701 
1702 static struct mlxsw_sp_fib_entry *
1703 mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp,
1704 		       const struct fib_entry_notifier_info *fen_info)
1705 {
1706 	struct mlxsw_sp_fib_entry *fib_entry;
1707 	struct fib_info *fi = fen_info->fi;
1708 	struct mlxsw_sp_vr *vr;
1709 	int err;
1710 
1711 	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
1712 			     MLXSW_SP_L3_PROTO_IPV4);
1713 	if (IS_ERR(vr))
1714 		return ERR_CAST(vr);
1715 
1716 	fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
1717 					      sizeof(fen_info->dst),
1718 					      fen_info->dst_len, fi->fib_dev);
1719 	if (fib_entry) {
1720 		/* Already exists, just take a reference */
1721 		fib_entry->ref_count++;
1722 		return fib_entry;
1723 	}
1724 	fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst,
1725 					      sizeof(fen_info->dst),
1726 					      fen_info->dst_len, fi->fib_dev);
1727 	if (!fib_entry) {
1728 		err = -ENOMEM;
1729 		goto err_fib_entry_create;
1730 	}
1731 	fib_entry->vr = vr;
1732 	fib_entry->fi = fi;
1733 	fib_entry->ref_count = 1;
1734 
1735 	err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry);
1736 	if (err)
1737 		goto err_fib4_entry_init;
1738 
1739 	return fib_entry;
1740 
1741 err_fib4_entry_init:
1742 	mlxsw_sp_fib_entry_destroy(fib_entry);
1743 err_fib_entry_create:
1744 	mlxsw_sp_vr_put(mlxsw_sp, vr);
1745 
1746 	return ERR_PTR(err);
1747 }
1748 
1749 static struct mlxsw_sp_fib_entry *
1750 mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp,
1751 			const struct fib_entry_notifier_info *fen_info)
1752 {
1753 	struct mlxsw_sp_vr *vr;
1754 
1755 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id,
1756 			      MLXSW_SP_L3_PROTO_IPV4);
1757 	if (!vr)
1758 		return NULL;
1759 
1760 	return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
1761 					 sizeof(fen_info->dst),
1762 					 fen_info->dst_len,
1763 					 fen_info->fi->fib_dev);
1764 }
1765 
1766 static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp,
1767 				   struct mlxsw_sp_fib_entry *fib_entry)
1768 {
1769 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1770 
1771 	if (--fib_entry->ref_count == 0) {
1772 		mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
1773 		mlxsw_sp_fib_entry_destroy(fib_entry);
1774 	}
1775 	mlxsw_sp_vr_put(mlxsw_sp, vr);
1776 }
1777 
1778 static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp,
1779 				       struct mlxsw_sp_fib_entry *fib_entry)
1780 {
1781 	unsigned int last_ref_count;
1782 
1783 	do {
1784 		last_ref_count = fib_entry->ref_count;
1785 		mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1786 	} while (last_ref_count != 1);
1787 }
1788 
1789 static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
1790 				    struct fib_entry_notifier_info *fen_info)
1791 {
1792 	struct mlxsw_sp_fib_entry *fib_entry;
1793 	struct mlxsw_sp_vr *vr;
1794 	int err;
1795 
1796 	if (mlxsw_sp->router.aborted)
1797 		return 0;
1798 
1799 	fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info);
1800 	if (IS_ERR(fib_entry)) {
1801 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n");
1802 		return PTR_ERR(fib_entry);
1803 	}
1804 
1805 	if (fib_entry->ref_count != 1)
1806 		return 0;
1807 
1808 	vr = fib_entry->vr;
1809 	err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry);
1810 	if (err) {
1811 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n");
1812 		goto err_fib_entry_insert;
1813 	}
1814 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1815 	if (err)
1816 		goto err_fib_entry_add;
1817 	return 0;
1818 
1819 err_fib_entry_add:
1820 	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
1821 err_fib_entry_insert:
1822 	mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1823 	return err;
1824 }
1825 
1826 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
1827 				     struct fib_entry_notifier_info *fen_info)
1828 {
1829 	struct mlxsw_sp_fib_entry *fib_entry;
1830 
1831 	if (mlxsw_sp->router.aborted)
1832 		return;
1833 
1834 	fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info);
1835 	if (!fib_entry)
1836 		return;
1837 
1838 	if (fib_entry->ref_count == 1) {
1839 		mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
1840 		mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry);
1841 	}
1842 
1843 	mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1844 }
1845 
1846 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
1847 {
1848 	char ralta_pl[MLXSW_REG_RALTA_LEN];
1849 	char ralst_pl[MLXSW_REG_RALST_LEN];
1850 	char raltb_pl[MLXSW_REG_RALTB_LEN];
1851 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1852 	int err;
1853 
1854 	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
1855 			     MLXSW_SP_LPM_TREE_MIN);
1856 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
1857 	if (err)
1858 		return err;
1859 
1860 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
1861 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
1862 	if (err)
1863 		return err;
1864 
1865 	mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
1866 			     MLXSW_SP_LPM_TREE_MIN);
1867 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
1868 	if (err)
1869 		return err;
1870 
1871 	mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
1872 			      MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
1873 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1874 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1875 }
1876 
1877 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
1878 {
1879 	struct mlxsw_resources *resources;
1880 	struct mlxsw_sp_fib_entry *fib_entry;
1881 	struct mlxsw_sp_fib_entry *tmp;
1882 	struct mlxsw_sp_vr *vr;
1883 	int i;
1884 	int err;
1885 
1886 	resources = mlxsw_core_resources_get(mlxsw_sp->core);
1887 	for (i = 0; i < resources->max_virtual_routers; i++) {
1888 		vr = &mlxsw_sp->router.vrs[i];
1889 		if (!vr->used)
1890 			continue;
1891 
1892 		list_for_each_entry_safe(fib_entry, tmp,
1893 					 &vr->fib->entry_list, list) {
1894 			bool do_break = &tmp->list == &vr->fib->entry_list;
1895 
1896 			mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
1897 			mlxsw_sp_fib_entry_remove(fib_entry->vr->fib,
1898 						  fib_entry);
1899 			mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry);
1900 			if (do_break)
1901 				break;
1902 		}
1903 	}
1904 	mlxsw_sp->router.aborted = true;
1905 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
1906 	if (err)
1907 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
1908 }
1909 
1910 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
1911 {
1912 	struct mlxsw_resources *resources;
1913 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
1914 	int err;
1915 
1916 	resources = mlxsw_core_resources_get(mlxsw_sp->core);
1917 	if (!resources->max_rif_valid)
1918 		return -EIO;
1919 
1920 	mlxsw_sp->rifs = kcalloc(resources->max_rif,
1921 				 sizeof(struct mlxsw_sp_rif *), GFP_KERNEL);
1922 	if (!mlxsw_sp->rifs)
1923 		return -ENOMEM;
1924 
1925 	mlxsw_reg_rgcr_pack(rgcr_pl, true);
1926 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif);
1927 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
1928 	if (err)
1929 		goto err_rgcr_fail;
1930 
1931 	return 0;
1932 
1933 err_rgcr_fail:
1934 	kfree(mlxsw_sp->rifs);
1935 	return err;
1936 }
1937 
1938 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
1939 {
1940 	struct mlxsw_resources *resources;
1941 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
1942 	int i;
1943 
1944 	mlxsw_reg_rgcr_pack(rgcr_pl, false);
1945 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
1946 
1947 	resources = mlxsw_core_resources_get(mlxsw_sp->core);
1948 	for (i = 0; i < resources->max_rif; i++)
1949 		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
1950 
1951 	kfree(mlxsw_sp->rifs);
1952 }
1953 
1954 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
1955 				     unsigned long event, void *ptr)
1956 {
1957 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
1958 	struct fib_entry_notifier_info *fen_info = ptr;
1959 	int err;
1960 
1961 	switch (event) {
1962 	case FIB_EVENT_ENTRY_ADD:
1963 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info);
1964 		if (err)
1965 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
1966 		break;
1967 	case FIB_EVENT_ENTRY_DEL:
1968 		mlxsw_sp_router_fib4_del(mlxsw_sp, fen_info);
1969 		break;
1970 	case FIB_EVENT_RULE_ADD: /* fall through */
1971 	case FIB_EVENT_RULE_DEL:
1972 		mlxsw_sp_router_fib4_abort(mlxsw_sp);
1973 		break;
1974 	}
1975 	return NOTIFY_DONE;
1976 }
1977 
1978 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
1979 {
1980 	int err;
1981 
1982 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
1983 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
1984 	err = __mlxsw_sp_router_init(mlxsw_sp);
1985 	if (err)
1986 		return err;
1987 
1988 	mlxsw_sp_lpm_init(mlxsw_sp);
1989 	err = mlxsw_sp_vrs_init(mlxsw_sp);
1990 	if (err)
1991 		goto err_vrs_init;
1992 
1993 	err =  mlxsw_sp_neigh_init(mlxsw_sp);
1994 	if (err)
1995 		goto err_neigh_init;
1996 
1997 	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
1998 	register_fib_notifier(&mlxsw_sp->fib_nb);
1999 	return 0;
2000 
2001 err_neigh_init:
2002 	mlxsw_sp_vrs_fini(mlxsw_sp);
2003 err_vrs_init:
2004 	__mlxsw_sp_router_fini(mlxsw_sp);
2005 	return err;
2006 }
2007 
2008 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2009 {
2010 	unregister_fib_notifier(&mlxsw_sp->fib_nb);
2011 	mlxsw_sp_neigh_fini(mlxsw_sp);
2012 	mlxsw_sp_vrs_fini(mlxsw_sp);
2013 	__mlxsw_sp_router_fini(mlxsw_sp);
2014 }
2015