1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <net/netevent.h>
44 #include <net/neighbour.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
47 
48 #include "spectrum.h"
49 #include "core.h"
50 #include "reg.h"
51 
52 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
53 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
54 
55 static bool
56 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
57 			     struct mlxsw_sp_prefix_usage *prefix_usage2)
58 {
59 	unsigned char prefix;
60 
61 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
62 		if (!test_bit(prefix, prefix_usage2->b))
63 			return false;
64 	}
65 	return true;
66 }
67 
68 static bool
69 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
70 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
71 {
72 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
73 }
74 
75 static bool
76 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
77 {
78 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
79 
80 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
81 }
82 
83 static void
84 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
85 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
86 {
87 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
88 }
89 
90 static void
91 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
92 {
93 	memset(prefix_usage, 0, sizeof(*prefix_usage));
94 }
95 
96 static void
97 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
98 			  unsigned char prefix_len)
99 {
100 	set_bit(prefix_len, prefix_usage->b);
101 }
102 
103 static void
104 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
105 			    unsigned char prefix_len)
106 {
107 	clear_bit(prefix_len, prefix_usage->b);
108 }
109 
110 struct mlxsw_sp_fib_key {
111 	struct net_device *dev;
112 	unsigned char addr[sizeof(struct in6_addr)];
113 	unsigned char prefix_len;
114 };
115 
116 enum mlxsw_sp_fib_entry_type {
117 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
118 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
119 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
120 };
121 
122 struct mlxsw_sp_nexthop_group;
123 
124 struct mlxsw_sp_fib_entry {
125 	struct rhash_head ht_node;
126 	struct list_head list;
127 	struct mlxsw_sp_fib_key key;
128 	enum mlxsw_sp_fib_entry_type type;
129 	unsigned int ref_count;
130 	u16 rif; /* used for action local */
131 	struct mlxsw_sp_vr *vr;
132 	struct fib_info *fi;
133 	struct list_head nexthop_group_node;
134 	struct mlxsw_sp_nexthop_group *nh_group;
135 };
136 
137 struct mlxsw_sp_fib {
138 	struct rhashtable ht;
139 	struct list_head entry_list;
140 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
141 	struct mlxsw_sp_prefix_usage prefix_usage;
142 };
143 
144 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
145 	.key_offset = offsetof(struct mlxsw_sp_fib_entry, key),
146 	.head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node),
147 	.key_len = sizeof(struct mlxsw_sp_fib_key),
148 	.automatic_shrinking = true,
149 };
150 
151 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib,
152 				     struct mlxsw_sp_fib_entry *fib_entry)
153 {
154 	unsigned char prefix_len = fib_entry->key.prefix_len;
155 	int err;
156 
157 	err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node,
158 				     mlxsw_sp_fib_ht_params);
159 	if (err)
160 		return err;
161 	list_add_tail(&fib_entry->list, &fib->entry_list);
162 	if (fib->prefix_ref_count[prefix_len]++ == 0)
163 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
164 	return 0;
165 }
166 
167 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib,
168 				      struct mlxsw_sp_fib_entry *fib_entry)
169 {
170 	unsigned char prefix_len = fib_entry->key.prefix_len;
171 
172 	if (--fib->prefix_ref_count[prefix_len] == 0)
173 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
174 	list_del(&fib_entry->list);
175 	rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node,
176 			       mlxsw_sp_fib_ht_params);
177 }
178 
179 static struct mlxsw_sp_fib_entry *
180 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
181 			  size_t addr_len, unsigned char prefix_len,
182 			  struct net_device *dev)
183 {
184 	struct mlxsw_sp_fib_entry *fib_entry;
185 
186 	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
187 	if (!fib_entry)
188 		return NULL;
189 	fib_entry->key.dev = dev;
190 	memcpy(fib_entry->key.addr, addr, addr_len);
191 	fib_entry->key.prefix_len = prefix_len;
192 	return fib_entry;
193 }
194 
195 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry)
196 {
197 	kfree(fib_entry);
198 }
199 
200 static struct mlxsw_sp_fib_entry *
201 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
202 			  size_t addr_len, unsigned char prefix_len,
203 			  struct net_device *dev)
204 {
205 	struct mlxsw_sp_fib_key key;
206 
207 	memset(&key, 0, sizeof(key));
208 	key.dev = dev;
209 	memcpy(key.addr, addr, addr_len);
210 	key.prefix_len = prefix_len;
211 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
212 }
213 
214 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
215 {
216 	struct mlxsw_sp_fib *fib;
217 	int err;
218 
219 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
220 	if (!fib)
221 		return ERR_PTR(-ENOMEM);
222 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
223 	if (err)
224 		goto err_rhashtable_init;
225 	INIT_LIST_HEAD(&fib->entry_list);
226 	return fib;
227 
228 err_rhashtable_init:
229 	kfree(fib);
230 	return ERR_PTR(err);
231 }
232 
233 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
234 {
235 	rhashtable_destroy(&fib->ht);
236 	kfree(fib);
237 }
238 
239 static struct mlxsw_sp_lpm_tree *
240 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved)
241 {
242 	static struct mlxsw_sp_lpm_tree *lpm_tree;
243 	int i;
244 
245 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
246 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
247 		if (lpm_tree->ref_count == 0) {
248 			if (one_reserved)
249 				one_reserved = false;
250 			else
251 				return lpm_tree;
252 		}
253 	}
254 	return NULL;
255 }
256 
257 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
258 				   struct mlxsw_sp_lpm_tree *lpm_tree)
259 {
260 	char ralta_pl[MLXSW_REG_RALTA_LEN];
261 
262 	mlxsw_reg_ralta_pack(ralta_pl, true,
263 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
264 			     lpm_tree->id);
265 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
266 }
267 
268 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
269 				  struct mlxsw_sp_lpm_tree *lpm_tree)
270 {
271 	char ralta_pl[MLXSW_REG_RALTA_LEN];
272 
273 	mlxsw_reg_ralta_pack(ralta_pl, false,
274 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
275 			     lpm_tree->id);
276 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
277 }
278 
279 static int
280 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
281 				  struct mlxsw_sp_prefix_usage *prefix_usage,
282 				  struct mlxsw_sp_lpm_tree *lpm_tree)
283 {
284 	char ralst_pl[MLXSW_REG_RALST_LEN];
285 	u8 root_bin = 0;
286 	u8 prefix;
287 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
288 
289 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
290 		root_bin = prefix;
291 
292 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
293 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
294 		if (prefix == 0)
295 			continue;
296 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
297 					 MLXSW_REG_RALST_BIN_NO_CHILD);
298 		last_prefix = prefix;
299 	}
300 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
301 }
302 
303 static struct mlxsw_sp_lpm_tree *
304 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
305 			 struct mlxsw_sp_prefix_usage *prefix_usage,
306 			 enum mlxsw_sp_l3proto proto, bool one_reserved)
307 {
308 	struct mlxsw_sp_lpm_tree *lpm_tree;
309 	int err;
310 
311 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved);
312 	if (!lpm_tree)
313 		return ERR_PTR(-EBUSY);
314 	lpm_tree->proto = proto;
315 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
316 	if (err)
317 		return ERR_PTR(err);
318 
319 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
320 						lpm_tree);
321 	if (err)
322 		goto err_left_struct_set;
323 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
324 	       sizeof(lpm_tree->prefix_usage));
325 	return lpm_tree;
326 
327 err_left_struct_set:
328 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
329 	return ERR_PTR(err);
330 }
331 
332 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
333 				     struct mlxsw_sp_lpm_tree *lpm_tree)
334 {
335 	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
336 }
337 
338 static struct mlxsw_sp_lpm_tree *
339 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
340 		      struct mlxsw_sp_prefix_usage *prefix_usage,
341 		      enum mlxsw_sp_l3proto proto, bool one_reserved)
342 {
343 	struct mlxsw_sp_lpm_tree *lpm_tree;
344 	int i;
345 
346 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
347 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
348 		if (lpm_tree->ref_count != 0 &&
349 		    lpm_tree->proto == proto &&
350 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
351 					     prefix_usage))
352 			goto inc_ref_count;
353 	}
354 	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
355 					    proto, one_reserved);
356 	if (IS_ERR(lpm_tree))
357 		return lpm_tree;
358 
359 inc_ref_count:
360 	lpm_tree->ref_count++;
361 	return lpm_tree;
362 }
363 
364 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
365 				 struct mlxsw_sp_lpm_tree *lpm_tree)
366 {
367 	if (--lpm_tree->ref_count == 0)
368 		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
369 	return 0;
370 }
371 
372 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
373 {
374 	struct mlxsw_sp_lpm_tree *lpm_tree;
375 	int i;
376 
377 	for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
378 		lpm_tree = &mlxsw_sp->router.lpm_trees[i];
379 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
380 	}
381 }
382 
383 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
384 {
385 	struct mlxsw_sp_vr *vr;
386 	int i;
387 
388 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
389 		vr = &mlxsw_sp->router.vrs[i];
390 		if (!vr->used)
391 			return vr;
392 	}
393 	return NULL;
394 }
395 
396 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
397 				     struct mlxsw_sp_vr *vr)
398 {
399 	char raltb_pl[MLXSW_REG_RALTB_LEN];
400 
401 	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
402 			     (enum mlxsw_reg_ralxx_protocol) vr->proto,
403 			     vr->lpm_tree->id);
404 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
405 }
406 
407 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
408 				       struct mlxsw_sp_vr *vr)
409 {
410 	char raltb_pl[MLXSW_REG_RALTB_LEN];
411 
412 	/* Bind to tree 0 which is default */
413 	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
414 			     (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
415 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
416 }
417 
418 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
419 {
420 	/* For our purpose, squash main and local table into one */
421 	if (tb_id == RT_TABLE_LOCAL)
422 		tb_id = RT_TABLE_MAIN;
423 	return tb_id;
424 }
425 
426 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
427 					    u32 tb_id,
428 					    enum mlxsw_sp_l3proto proto)
429 {
430 	struct mlxsw_sp_vr *vr;
431 	int i;
432 
433 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
434 
435 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
436 		vr = &mlxsw_sp->router.vrs[i];
437 		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
438 			return vr;
439 	}
440 	return NULL;
441 }
442 
443 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
444 					      unsigned char prefix_len,
445 					      u32 tb_id,
446 					      enum mlxsw_sp_l3proto proto)
447 {
448 	struct mlxsw_sp_prefix_usage req_prefix_usage;
449 	struct mlxsw_sp_lpm_tree *lpm_tree;
450 	struct mlxsw_sp_vr *vr;
451 	int err;
452 
453 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
454 	if (!vr)
455 		return ERR_PTR(-EBUSY);
456 	vr->fib = mlxsw_sp_fib_create();
457 	if (IS_ERR(vr->fib))
458 		return ERR_CAST(vr->fib);
459 
460 	vr->proto = proto;
461 	vr->tb_id = tb_id;
462 	mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
463 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
464 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
465 					 proto, true);
466 	if (IS_ERR(lpm_tree)) {
467 		err = PTR_ERR(lpm_tree);
468 		goto err_tree_get;
469 	}
470 	vr->lpm_tree = lpm_tree;
471 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
472 	if (err)
473 		goto err_tree_bind;
474 
475 	vr->used = true;
476 	return vr;
477 
478 err_tree_bind:
479 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
480 err_tree_get:
481 	mlxsw_sp_fib_destroy(vr->fib);
482 
483 	return ERR_PTR(err);
484 }
485 
486 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
487 				struct mlxsw_sp_vr *vr)
488 {
489 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
490 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
491 	mlxsw_sp_fib_destroy(vr->fib);
492 	vr->used = false;
493 }
494 
495 static int
496 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
497 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
498 {
499 	struct mlxsw_sp_lpm_tree *lpm_tree;
500 
501 	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
502 				     &vr->lpm_tree->prefix_usage))
503 		return 0;
504 
505 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
506 					 vr->proto, false);
507 	if (IS_ERR(lpm_tree)) {
508 		/* We failed to get a tree according to the required
509 		 * prefix usage. However, the current tree might be still good
510 		 * for us if our requirement is subset of the prefixes used
511 		 * in the tree.
512 		 */
513 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
514 						 &vr->lpm_tree->prefix_usage))
515 			return 0;
516 		return PTR_ERR(lpm_tree);
517 	}
518 
519 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
520 	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
521 	vr->lpm_tree = lpm_tree;
522 	return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
523 }
524 
525 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
526 					   unsigned char prefix_len,
527 					   u32 tb_id,
528 					   enum mlxsw_sp_l3proto proto)
529 {
530 	struct mlxsw_sp_vr *vr;
531 	int err;
532 
533 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
534 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
535 	if (!vr) {
536 		vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
537 		if (IS_ERR(vr))
538 			return vr;
539 	} else {
540 		struct mlxsw_sp_prefix_usage req_prefix_usage;
541 
542 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
543 					  &vr->fib->prefix_usage);
544 		mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
545 		/* Need to replace LPM tree in case new prefix is required. */
546 		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
547 						 &req_prefix_usage);
548 		if (err)
549 			return ERR_PTR(err);
550 	}
551 	return vr;
552 }
553 
554 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
555 {
556 	/* Destroy virtual router entity in case the associated FIB is empty
557 	 * and allow it to be used for other tables in future. Otherwise,
558 	 * check if some prefix usage did not disappear and change tree if
559 	 * that is the case. Note that in case new, smaller tree cannot be
560 	 * allocated, the original one will be kept being used.
561 	 */
562 	if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
563 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
564 	else
565 		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
566 					   &vr->fib->prefix_usage);
567 }
568 
569 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
570 {
571 	struct mlxsw_sp_vr *vr;
572 	u64 max_vrs;
573 	int i;
574 
575 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
576 		return -EIO;
577 
578 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
579 	mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
580 				       GFP_KERNEL);
581 	if (!mlxsw_sp->router.vrs)
582 		return -ENOMEM;
583 
584 	for (i = 0; i < max_vrs; i++) {
585 		vr = &mlxsw_sp->router.vrs[i];
586 		vr->id = i;
587 	}
588 
589 	return 0;
590 }
591 
592 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
593 
594 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
595 {
596 	/* At this stage we're guaranteed not to have new incoming
597 	 * FIB notifications and the work queue is free from FIBs
598 	 * sitting on top of mlxsw netdevs. However, we can still
599 	 * have other FIBs queued. Flush the queue before flushing
600 	 * the device's tables. No need for locks, as we're the only
601 	 * writer.
602 	 */
603 	mlxsw_core_flush_owq();
604 	mlxsw_sp_router_fib_flush(mlxsw_sp);
605 	kfree(mlxsw_sp->router.vrs);
606 }
607 
608 struct mlxsw_sp_neigh_key {
609 	struct neighbour *n;
610 };
611 
612 struct mlxsw_sp_neigh_entry {
613 	struct rhash_head ht_node;
614 	struct mlxsw_sp_neigh_key key;
615 	u16 rif;
616 	bool offloaded;
617 	struct delayed_work dw;
618 	struct mlxsw_sp_port *mlxsw_sp_port;
619 	unsigned char ha[ETH_ALEN];
620 	struct list_head nexthop_list; /* list of nexthops using
621 					* this neigh entry
622 					*/
623 	struct list_head nexthop_neighs_list_node;
624 };
625 
626 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
627 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
628 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
629 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
630 };
631 
632 static int
633 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
634 			    struct mlxsw_sp_neigh_entry *neigh_entry)
635 {
636 	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
637 				      &neigh_entry->ht_node,
638 				      mlxsw_sp_neigh_ht_params);
639 }
640 
641 static void
642 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
643 			    struct mlxsw_sp_neigh_entry *neigh_entry)
644 {
645 	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
646 			       &neigh_entry->ht_node,
647 			       mlxsw_sp_neigh_ht_params);
648 }
649 
650 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work);
651 
652 static struct mlxsw_sp_neigh_entry *
653 mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif)
654 {
655 	struct mlxsw_sp_neigh_entry *neigh_entry;
656 
657 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
658 	if (!neigh_entry)
659 		return NULL;
660 	neigh_entry->key.n = n;
661 	neigh_entry->rif = rif;
662 	INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
663 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
664 	return neigh_entry;
665 }
666 
667 static void
668 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry)
669 {
670 	kfree(neigh_entry);
671 }
672 
673 static struct mlxsw_sp_neigh_entry *
674 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
675 {
676 	struct mlxsw_sp_neigh_key key;
677 
678 	key.n = n;
679 	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
680 				      &key, mlxsw_sp_neigh_ht_params);
681 }
682 
683 int mlxsw_sp_router_neigh_construct(struct net_device *dev,
684 				    struct neighbour *n)
685 {
686 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
687 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
688 	struct mlxsw_sp_neigh_entry *neigh_entry;
689 	struct mlxsw_sp_rif *r;
690 	int err;
691 
692 	if (n->tbl != &arp_tbl)
693 		return 0;
694 
695 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
696 	if (neigh_entry)
697 		return 0;
698 
699 	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
700 	if (WARN_ON(!r))
701 		return -EINVAL;
702 
703 	neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif);
704 	if (!neigh_entry)
705 		return -ENOMEM;
706 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
707 	if (err)
708 		goto err_neigh_entry_insert;
709 	return 0;
710 
711 err_neigh_entry_insert:
712 	mlxsw_sp_neigh_entry_destroy(neigh_entry);
713 	return err;
714 }
715 
716 void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
717 				   struct neighbour *n)
718 {
719 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
720 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
721 	struct mlxsw_sp_neigh_entry *neigh_entry;
722 
723 	if (n->tbl != &arp_tbl)
724 		return;
725 
726 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
727 	if (!neigh_entry)
728 		return;
729 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
730 	mlxsw_sp_neigh_entry_destroy(neigh_entry);
731 }
732 
733 static void
734 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
735 {
736 	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
737 
738 	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
739 }
740 
741 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
742 						   char *rauhtd_pl,
743 						   int ent_index)
744 {
745 	struct net_device *dev;
746 	struct neighbour *n;
747 	__be32 dipn;
748 	u32 dip;
749 	u16 rif;
750 
751 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
752 
753 	if (!mlxsw_sp->rifs[rif]) {
754 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
755 		return;
756 	}
757 
758 	dipn = htonl(dip);
759 	dev = mlxsw_sp->rifs[rif]->dev;
760 	n = neigh_lookup(&arp_tbl, &dipn, dev);
761 	if (!n) {
762 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
763 			   &dip);
764 		return;
765 	}
766 
767 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
768 	neigh_event_send(n, NULL);
769 	neigh_release(n);
770 }
771 
772 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
773 						   char *rauhtd_pl,
774 						   int rec_index)
775 {
776 	u8 num_entries;
777 	int i;
778 
779 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
780 								rec_index);
781 	/* Hardware starts counting at 0, so add 1. */
782 	num_entries++;
783 
784 	/* Each record consists of several neighbour entries. */
785 	for (i = 0; i < num_entries; i++) {
786 		int ent_index;
787 
788 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
789 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
790 						       ent_index);
791 	}
792 
793 }
794 
795 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
796 					      char *rauhtd_pl, int rec_index)
797 {
798 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
799 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
800 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
801 						       rec_index);
802 		break;
803 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
804 		WARN_ON_ONCE(1);
805 		break;
806 	}
807 }
808 
809 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
810 {
811 	u8 num_rec, last_rec_index, num_entries;
812 
813 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
814 	last_rec_index = num_rec - 1;
815 
816 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
817 		return false;
818 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
819 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
820 		return true;
821 
822 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
823 								last_rec_index);
824 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
825 		return true;
826 	return false;
827 }
828 
829 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
830 {
831 	char *rauhtd_pl;
832 	u8 num_rec;
833 	int i, err;
834 
835 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
836 	if (!rauhtd_pl)
837 		return -ENOMEM;
838 
839 	/* Make sure the neighbour's netdev isn't removed in the
840 	 * process.
841 	 */
842 	rtnl_lock();
843 	do {
844 		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
845 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
846 				      rauhtd_pl);
847 		if (err) {
848 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
849 			break;
850 		}
851 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
852 		for (i = 0; i < num_rec; i++)
853 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
854 							  i);
855 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
856 	rtnl_unlock();
857 
858 	kfree(rauhtd_pl);
859 	return err;
860 }
861 
862 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
863 {
864 	struct mlxsw_sp_neigh_entry *neigh_entry;
865 
866 	/* Take RTNL mutex here to prevent lists from changes */
867 	rtnl_lock();
868 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
869 			    nexthop_neighs_list_node) {
870 		/* If this neigh have nexthops, make the kernel think this neigh
871 		 * is active regardless of the traffic.
872 		 */
873 		if (!list_empty(&neigh_entry->nexthop_list))
874 			neigh_event_send(neigh_entry->key.n, NULL);
875 	}
876 	rtnl_unlock();
877 }
878 
879 static void
880 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
881 {
882 	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
883 
884 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
885 			       msecs_to_jiffies(interval));
886 }
887 
888 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
889 {
890 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
891 						 router.neighs_update.dw.work);
892 	int err;
893 
894 	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
895 	if (err)
896 		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
897 
898 	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
899 
900 	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
901 }
902 
903 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
904 {
905 	struct mlxsw_sp_neigh_entry *neigh_entry;
906 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
907 						 router.nexthop_probe_dw.work);
908 
909 	/* Iterate over nexthop neighbours, find those who are unresolved and
910 	 * send arp on them. This solves the chicken-egg problem when
911 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
912 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
913 	 * using different nexthop.
914 	 *
915 	 * Take RTNL mutex here to prevent lists from changes.
916 	 */
917 	rtnl_lock();
918 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
919 			    nexthop_neighs_list_node) {
920 		if (!(neigh_entry->key.n->nud_state & NUD_VALID) &&
921 		    !list_empty(&neigh_entry->nexthop_list))
922 			neigh_event_send(neigh_entry->key.n, NULL);
923 	}
924 	rtnl_unlock();
925 
926 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
927 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
928 }
929 
930 static void
931 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
932 			      struct mlxsw_sp_neigh_entry *neigh_entry,
933 			      bool removing);
934 
935 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
936 {
937 	struct mlxsw_sp_neigh_entry *neigh_entry =
938 		container_of(work, struct mlxsw_sp_neigh_entry, dw.work);
939 	struct neighbour *n = neigh_entry->key.n;
940 	struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port;
941 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
942 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
943 	struct net_device *dev;
944 	bool entry_connected;
945 	u8 nud_state, dead;
946 	bool updating;
947 	bool removing;
948 	bool adding;
949 	u32 dip;
950 	int err;
951 
952 	read_lock_bh(&n->lock);
953 	dip = ntohl(*((__be32 *) n->primary_key));
954 	memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha));
955 	nud_state = n->nud_state;
956 	dead = n->dead;
957 	dev = n->dev;
958 	read_unlock_bh(&n->lock);
959 
960 	entry_connected = nud_state & NUD_VALID && !dead;
961 	adding = (!neigh_entry->offloaded) && entry_connected;
962 	updating = neigh_entry->offloaded && entry_connected;
963 	removing = neigh_entry->offloaded && !entry_connected;
964 
965 	if (adding || updating) {
966 		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD,
967 				      neigh_entry->rif,
968 				      neigh_entry->ha, dip);
969 		err = mlxsw_reg_write(mlxsw_sp->core,
970 				      MLXSW_REG(rauht), rauht_pl);
971 		if (err) {
972 			netdev_err(dev, "Could not add neigh %pI4h\n", &dip);
973 			neigh_entry->offloaded = false;
974 		} else {
975 			neigh_entry->offloaded = true;
976 		}
977 		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
978 	} else if (removing) {
979 		mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
980 				      neigh_entry->rif,
981 				      neigh_entry->ha, dip);
982 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht),
983 				      rauht_pl);
984 		if (err) {
985 			netdev_err(dev, "Could not delete neigh %pI4h\n", &dip);
986 			neigh_entry->offloaded = true;
987 		} else {
988 			neigh_entry->offloaded = false;
989 		}
990 		mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
991 	}
992 
993 	neigh_release(n);
994 	mlxsw_sp_port_dev_put(mlxsw_sp_port);
995 }
996 
997 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
998 				   unsigned long event, void *ptr)
999 {
1000 	struct mlxsw_sp_neigh_entry *neigh_entry;
1001 	struct mlxsw_sp_port *mlxsw_sp_port;
1002 	struct mlxsw_sp *mlxsw_sp;
1003 	unsigned long interval;
1004 	struct net_device *dev;
1005 	struct neigh_parms *p;
1006 	struct neighbour *n;
1007 	u32 dip;
1008 
1009 	switch (event) {
1010 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1011 		p = ptr;
1012 
1013 		/* We don't care about changes in the default table. */
1014 		if (!p->dev || p->tbl != &arp_tbl)
1015 			return NOTIFY_DONE;
1016 
1017 		/* We are in atomic context and can't take RTNL mutex,
1018 		 * so use RCU variant to walk the device chain.
1019 		 */
1020 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1021 		if (!mlxsw_sp_port)
1022 			return NOTIFY_DONE;
1023 
1024 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1025 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1026 		mlxsw_sp->router.neighs_update.interval = interval;
1027 
1028 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
1029 		break;
1030 	case NETEVENT_NEIGH_UPDATE:
1031 		n = ptr;
1032 		dev = n->dev;
1033 
1034 		if (n->tbl != &arp_tbl)
1035 			return NOTIFY_DONE;
1036 
1037 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev);
1038 		if (!mlxsw_sp_port)
1039 			return NOTIFY_DONE;
1040 
1041 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1042 		dip = ntohl(*((__be32 *) n->primary_key));
1043 		neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1044 		if (WARN_ON(!neigh_entry)) {
1045 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
1046 			return NOTIFY_DONE;
1047 		}
1048 		neigh_entry->mlxsw_sp_port = mlxsw_sp_port;
1049 
1050 		/* Take a reference to ensure the neighbour won't be
1051 		 * destructed until we drop the reference in delayed
1052 		 * work.
1053 		 */
1054 		neigh_clone(n);
1055 		if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) {
1056 			neigh_release(n);
1057 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
1058 		}
1059 		break;
1060 	}
1061 
1062 	return NOTIFY_DONE;
1063 }
1064 
1065 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1066 {
1067 	int err;
1068 
1069 	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1070 			      &mlxsw_sp_neigh_ht_params);
1071 	if (err)
1072 		return err;
1073 
1074 	/* Initialize the polling interval according to the default
1075 	 * table.
1076 	 */
1077 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1078 
1079 	/* Create the delayed works for the activity_update */
1080 	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1081 			  mlxsw_sp_router_neighs_update_work);
1082 	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1083 			  mlxsw_sp_router_probe_unresolved_nexthops);
1084 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1085 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1086 	return 0;
1087 }
1088 
1089 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1090 {
1091 	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1092 	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1093 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1094 }
1095 
1096 struct mlxsw_sp_nexthop {
1097 	struct list_head neigh_list_node; /* member of neigh entry list */
1098 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1099 						* this belongs to
1100 						*/
1101 	u8 should_offload:1, /* set indicates this neigh is connected and
1102 			      * should be put to KVD linear area of this group.
1103 			      */
1104 	   offloaded:1, /* set in case the neigh is actually put into
1105 			 * KVD linear area of this group.
1106 			 */
1107 	   update:1; /* set indicates that MAC of this neigh should be
1108 		      * updated in HW
1109 		      */
1110 	struct mlxsw_sp_neigh_entry *neigh_entry;
1111 };
1112 
1113 struct mlxsw_sp_nexthop_group {
1114 	struct list_head list; /* node in mlxsw->router.nexthop_group_list */
1115 	struct list_head fib_list; /* list of fib entries that use this group */
1116 	u8 adj_index_valid:1;
1117 	u32 adj_index;
1118 	u16 ecmp_size;
1119 	u16 count;
1120 	struct mlxsw_sp_nexthop nexthops[0];
1121 };
1122 
1123 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1124 					     struct mlxsw_sp_vr *vr,
1125 					     u32 adj_index, u16 ecmp_size,
1126 					     u32 new_adj_index,
1127 					     u16 new_ecmp_size)
1128 {
1129 	char raleu_pl[MLXSW_REG_RALEU_LEN];
1130 
1131 	mlxsw_reg_raleu_pack(raleu_pl,
1132 			     (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1133 			     adj_index, ecmp_size, new_adj_index,
1134 			     new_ecmp_size);
1135 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1136 }
1137 
1138 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1139 					  struct mlxsw_sp_nexthop_group *nh_grp,
1140 					  u32 old_adj_index, u16 old_ecmp_size)
1141 {
1142 	struct mlxsw_sp_fib_entry *fib_entry;
1143 	struct mlxsw_sp_vr *vr = NULL;
1144 	int err;
1145 
1146 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1147 		if (vr == fib_entry->vr)
1148 			continue;
1149 		vr = fib_entry->vr;
1150 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1151 							old_adj_index,
1152 							old_ecmp_size,
1153 							nh_grp->adj_index,
1154 							nh_grp->ecmp_size);
1155 		if (err)
1156 			return err;
1157 	}
1158 	return 0;
1159 }
1160 
1161 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1162 				       struct mlxsw_sp_nexthop *nh)
1163 {
1164 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1165 	char ratr_pl[MLXSW_REG_RATR_LEN];
1166 
1167 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1168 			    true, adj_index, neigh_entry->rif);
1169 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1170 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1171 }
1172 
1173 static int
1174 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1175 				  struct mlxsw_sp_nexthop_group *nh_grp)
1176 {
1177 	u32 adj_index = nh_grp->adj_index; /* base */
1178 	struct mlxsw_sp_nexthop *nh;
1179 	int i;
1180 	int err;
1181 
1182 	for (i = 0; i < nh_grp->count; i++) {
1183 		nh = &nh_grp->nexthops[i];
1184 
1185 		if (!nh->should_offload) {
1186 			nh->offloaded = 0;
1187 			continue;
1188 		}
1189 
1190 		if (nh->update) {
1191 			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1192 							  adj_index, nh);
1193 			if (err)
1194 				return err;
1195 			nh->update = 0;
1196 			nh->offloaded = 1;
1197 		}
1198 		adj_index++;
1199 	}
1200 	return 0;
1201 }
1202 
1203 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1204 				     struct mlxsw_sp_fib_entry *fib_entry);
1205 
1206 static int
1207 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1208 				    struct mlxsw_sp_nexthop_group *nh_grp)
1209 {
1210 	struct mlxsw_sp_fib_entry *fib_entry;
1211 	int err;
1212 
1213 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1214 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1215 		if (err)
1216 			return err;
1217 	}
1218 	return 0;
1219 }
1220 
1221 static void
1222 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1223 			       struct mlxsw_sp_nexthop_group *nh_grp)
1224 {
1225 	struct mlxsw_sp_nexthop *nh;
1226 	bool offload_change = false;
1227 	u32 adj_index;
1228 	u16 ecmp_size = 0;
1229 	bool old_adj_index_valid;
1230 	u32 old_adj_index;
1231 	u16 old_ecmp_size;
1232 	int ret;
1233 	int i;
1234 	int err;
1235 
1236 	for (i = 0; i < nh_grp->count; i++) {
1237 		nh = &nh_grp->nexthops[i];
1238 
1239 		if (nh->should_offload ^ nh->offloaded) {
1240 			offload_change = true;
1241 			if (nh->should_offload)
1242 				nh->update = 1;
1243 		}
1244 		if (nh->should_offload)
1245 			ecmp_size++;
1246 	}
1247 	if (!offload_change) {
1248 		/* Nothing was added or removed, so no need to reallocate. Just
1249 		 * update MAC on existing adjacency indexes.
1250 		 */
1251 		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
1252 		if (err) {
1253 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1254 			goto set_trap;
1255 		}
1256 		return;
1257 	}
1258 	if (!ecmp_size)
1259 		/* No neigh of this group is connected so we just set
1260 		 * the trap and let everthing flow through kernel.
1261 		 */
1262 		goto set_trap;
1263 
1264 	ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1265 	if (ret < 0) {
1266 		/* We ran out of KVD linear space, just set the
1267 		 * trap and let everything flow through kernel.
1268 		 */
1269 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1270 		goto set_trap;
1271 	}
1272 	adj_index = ret;
1273 	old_adj_index_valid = nh_grp->adj_index_valid;
1274 	old_adj_index = nh_grp->adj_index;
1275 	old_ecmp_size = nh_grp->ecmp_size;
1276 	nh_grp->adj_index_valid = 1;
1277 	nh_grp->adj_index = adj_index;
1278 	nh_grp->ecmp_size = ecmp_size;
1279 	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
1280 	if (err) {
1281 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1282 		goto set_trap;
1283 	}
1284 
1285 	if (!old_adj_index_valid) {
1286 		/* The trap was set for fib entries, so we have to call
1287 		 * fib entry update to unset it and use adjacency index.
1288 		 */
1289 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1290 		if (err) {
1291 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1292 			goto set_trap;
1293 		}
1294 		return;
1295 	}
1296 
1297 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1298 					     old_adj_index, old_ecmp_size);
1299 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1300 	if (err) {
1301 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1302 		goto set_trap;
1303 	}
1304 	return;
1305 
1306 set_trap:
1307 	old_adj_index_valid = nh_grp->adj_index_valid;
1308 	nh_grp->adj_index_valid = 0;
1309 	for (i = 0; i < nh_grp->count; i++) {
1310 		nh = &nh_grp->nexthops[i];
1311 		nh->offloaded = 0;
1312 	}
1313 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1314 	if (err)
1315 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1316 	if (old_adj_index_valid)
1317 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1318 }
1319 
1320 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1321 					    bool removing)
1322 {
1323 	if (!removing && !nh->should_offload)
1324 		nh->should_offload = 1;
1325 	else if (removing && nh->offloaded)
1326 		nh->should_offload = 0;
1327 	nh->update = 1;
1328 }
1329 
1330 static void
1331 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1332 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1333 			      bool removing)
1334 {
1335 	struct mlxsw_sp_nexthop *nh;
1336 
1337 	/* Take RTNL mutex here to prevent lists from changes */
1338 	rtnl_lock();
1339 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
1340 			    neigh_list_node) {
1341 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
1342 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1343 	}
1344 	rtnl_unlock();
1345 }
1346 
1347 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1348 				 struct mlxsw_sp_nexthop_group *nh_grp,
1349 				 struct mlxsw_sp_nexthop *nh,
1350 				 struct fib_nh *fib_nh)
1351 {
1352 	struct mlxsw_sp_neigh_entry *neigh_entry;
1353 	struct net_device *dev = fib_nh->nh_dev;
1354 	struct neighbour *n;
1355 	u8 nud_state, dead;
1356 
1357 	/* Take a reference of neigh here ensuring that neigh would
1358 	 * not be detructed before the nexthop entry is finished.
1359 	 * The reference is taken either in neigh_lookup() or
1360 	 * in neith_create() in case n is not found.
1361 	 */
1362 	n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev);
1363 	if (!n) {
1364 		n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev);
1365 		if (IS_ERR(n))
1366 			return PTR_ERR(n);
1367 		neigh_event_send(n, NULL);
1368 	}
1369 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1370 	if (!neigh_entry) {
1371 		neigh_release(n);
1372 		return -EINVAL;
1373 	}
1374 
1375 	/* If that is the first nexthop connected to that neigh, add to
1376 	 * nexthop_neighs_list
1377 	 */
1378 	if (list_empty(&neigh_entry->nexthop_list))
1379 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1380 			      &mlxsw_sp->router.nexthop_neighs_list);
1381 
1382 	nh->nh_grp = nh_grp;
1383 	nh->neigh_entry = neigh_entry;
1384 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1385 	read_lock_bh(&n->lock);
1386 	nud_state = n->nud_state;
1387 	dead = n->dead;
1388 	read_unlock_bh(&n->lock);
1389 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1390 
1391 	return 0;
1392 }
1393 
1394 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1395 				  struct mlxsw_sp_nexthop *nh)
1396 {
1397 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1398 
1399 	__mlxsw_sp_nexthop_neigh_update(nh, true);
1400 	list_del(&nh->neigh_list_node);
1401 
1402 	/* If that is the last nexthop connected to that neigh, remove from
1403 	 * nexthop_neighs_list
1404 	 */
1405 	if (list_empty(&nh->neigh_entry->nexthop_list))
1406 		list_del(&nh->neigh_entry->nexthop_neighs_list_node);
1407 
1408 	neigh_release(neigh_entry->key.n);
1409 }
1410 
1411 static struct mlxsw_sp_nexthop_group *
1412 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1413 {
1414 	struct mlxsw_sp_nexthop_group *nh_grp;
1415 	struct mlxsw_sp_nexthop *nh;
1416 	struct fib_nh *fib_nh;
1417 	size_t alloc_size;
1418 	int i;
1419 	int err;
1420 
1421 	alloc_size = sizeof(*nh_grp) +
1422 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1423 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1424 	if (!nh_grp)
1425 		return ERR_PTR(-ENOMEM);
1426 	INIT_LIST_HEAD(&nh_grp->fib_list);
1427 	nh_grp->count = fi->fib_nhs;
1428 	for (i = 0; i < nh_grp->count; i++) {
1429 		nh = &nh_grp->nexthops[i];
1430 		fib_nh = &fi->fib_nh[i];
1431 		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1432 		if (err)
1433 			goto err_nexthop_init;
1434 	}
1435 	list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list);
1436 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1437 	return nh_grp;
1438 
1439 err_nexthop_init:
1440 	for (i--; i >= 0; i--)
1441 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1442 	kfree(nh_grp);
1443 	return ERR_PTR(err);
1444 }
1445 
1446 static void
1447 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1448 			       struct mlxsw_sp_nexthop_group *nh_grp)
1449 {
1450 	struct mlxsw_sp_nexthop *nh;
1451 	int i;
1452 
1453 	list_del(&nh_grp->list);
1454 	for (i = 0; i < nh_grp->count; i++) {
1455 		nh = &nh_grp->nexthops[i];
1456 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1457 	}
1458 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1459 	WARN_ON_ONCE(nh_grp->adj_index_valid);
1460 	kfree(nh_grp);
1461 }
1462 
1463 static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
1464 				   struct fib_info *fi)
1465 {
1466 	int i;
1467 
1468 	for (i = 0; i < fi->fib_nhs; i++) {
1469 		struct fib_nh *fib_nh = &fi->fib_nh[i];
1470 		struct neighbour *n = nh->neigh_entry->key.n;
1471 
1472 		if (memcmp(n->primary_key, &fib_nh->nh_gw,
1473 			   sizeof(fib_nh->nh_gw)) == 0 &&
1474 		    n->dev == fib_nh->nh_dev)
1475 			return true;
1476 	}
1477 	return false;
1478 }
1479 
1480 static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp,
1481 					 struct fib_info *fi)
1482 {
1483 	int i;
1484 
1485 	if (nh_grp->count != fi->fib_nhs)
1486 		return false;
1487 	for (i = 0; i < nh_grp->count; i++) {
1488 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
1489 
1490 		if (!mlxsw_sp_nexthop_match(nh, fi))
1491 			return false;
1492 	}
1493 	return true;
1494 }
1495 
1496 static struct mlxsw_sp_nexthop_group *
1497 mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1498 {
1499 	struct mlxsw_sp_nexthop_group *nh_grp;
1500 
1501 	list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list,
1502 			    list) {
1503 		if (mlxsw_sp_nexthop_group_match(nh_grp, fi))
1504 			return nh_grp;
1505 	}
1506 	return NULL;
1507 }
1508 
1509 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1510 				      struct mlxsw_sp_fib_entry *fib_entry,
1511 				      struct fib_info *fi)
1512 {
1513 	struct mlxsw_sp_nexthop_group *nh_grp;
1514 
1515 	nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi);
1516 	if (!nh_grp) {
1517 		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1518 		if (IS_ERR(nh_grp))
1519 			return PTR_ERR(nh_grp);
1520 	}
1521 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1522 	fib_entry->nh_group = nh_grp;
1523 	return 0;
1524 }
1525 
1526 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1527 				       struct mlxsw_sp_fib_entry *fib_entry)
1528 {
1529 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1530 
1531 	list_del(&fib_entry->nexthop_group_node);
1532 	if (!list_empty(&nh_grp->fib_list))
1533 		return;
1534 	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1535 }
1536 
1537 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1538 					 struct mlxsw_sp_fib_entry *fib_entry,
1539 					 enum mlxsw_reg_ralue_op op)
1540 {
1541 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1542 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1543 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1544 	enum mlxsw_reg_ralue_trap_action trap_action;
1545 	u16 trap_id = 0;
1546 	u32 adjacency_index = 0;
1547 	u16 ecmp_size = 0;
1548 
1549 	/* In case the nexthop group adjacency index is valid, use it
1550 	 * with provided ECMP size. Otherwise, setup trap and pass
1551 	 * traffic to kernel.
1552 	 */
1553 	if (fib_entry->nh_group->adj_index_valid) {
1554 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1555 		adjacency_index = fib_entry->nh_group->adj_index;
1556 		ecmp_size = fib_entry->nh_group->ecmp_size;
1557 	} else {
1558 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1559 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1560 	}
1561 
1562 	mlxsw_reg_ralue_pack4(ralue_pl,
1563 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1564 			      vr->id, fib_entry->key.prefix_len, *p_dip);
1565 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1566 					adjacency_index, ecmp_size);
1567 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1568 }
1569 
1570 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1571 					struct mlxsw_sp_fib_entry *fib_entry,
1572 					enum mlxsw_reg_ralue_op op)
1573 {
1574 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1575 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1576 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1577 
1578 	mlxsw_reg_ralue_pack4(ralue_pl,
1579 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1580 			      vr->id, fib_entry->key.prefix_len, *p_dip);
1581 	mlxsw_reg_ralue_act_local_pack(ralue_pl,
1582 				       MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
1583 				       fib_entry->rif);
1584 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1585 }
1586 
1587 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1588 				       struct mlxsw_sp_fib_entry *fib_entry,
1589 				       enum mlxsw_reg_ralue_op op)
1590 {
1591 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1592 	u32 *p_dip = (u32 *) fib_entry->key.addr;
1593 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1594 
1595 	mlxsw_reg_ralue_pack4(ralue_pl,
1596 			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1597 			      vr->id, fib_entry->key.prefix_len, *p_dip);
1598 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1599 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1600 }
1601 
1602 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1603 				  struct mlxsw_sp_fib_entry *fib_entry,
1604 				  enum mlxsw_reg_ralue_op op)
1605 {
1606 	switch (fib_entry->type) {
1607 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1608 		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1609 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1610 		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1611 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1612 		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1613 	}
1614 	return -EINVAL;
1615 }
1616 
1617 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1618 				 struct mlxsw_sp_fib_entry *fib_entry,
1619 				 enum mlxsw_reg_ralue_op op)
1620 {
1621 	switch (fib_entry->vr->proto) {
1622 	case MLXSW_SP_L3_PROTO_IPV4:
1623 		return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1624 	case MLXSW_SP_L3_PROTO_IPV6:
1625 		return -EINVAL;
1626 	}
1627 	return -EINVAL;
1628 }
1629 
1630 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1631 				     struct mlxsw_sp_fib_entry *fib_entry)
1632 {
1633 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1634 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
1635 }
1636 
1637 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1638 				  struct mlxsw_sp_fib_entry *fib_entry)
1639 {
1640 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1641 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
1642 }
1643 
1644 static int
1645 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
1646 				const struct fib_entry_notifier_info *fen_info,
1647 				struct mlxsw_sp_fib_entry *fib_entry)
1648 {
1649 	struct fib_info *fi = fen_info->fi;
1650 	struct mlxsw_sp_rif *r = NULL;
1651 	int nhsel;
1652 	int err;
1653 
1654 	if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
1655 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1656 		return 0;
1657 	}
1658 	if (fen_info->type != RTN_UNICAST)
1659 		return -EINVAL;
1660 
1661 	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1662 		const struct fib_nh *nh = &fi->fib_nh[nhsel];
1663 
1664 		if (!nh->nh_dev)
1665 			continue;
1666 		r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev);
1667 		if (!r) {
1668 			/* In case router interface is not found for
1669 			 * at least one of the nexthops, that means
1670 			 * the nexthop points to some device unrelated
1671 			 * to us. Set trap and pass the packets for
1672 			 * this prefix to kernel.
1673 			 */
1674 			break;
1675 		}
1676 	}
1677 
1678 	if (!r) {
1679 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1680 		return 0;
1681 	}
1682 
1683 	if (fi->fib_scope != RT_SCOPE_UNIVERSE) {
1684 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1685 		fib_entry->rif = r->rif;
1686 	} else {
1687 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1688 		err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
1689 		if (err)
1690 			return err;
1691 	}
1692 	fib_info_offload_inc(fen_info->fi);
1693 	return 0;
1694 }
1695 
1696 static void
1697 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
1698 				struct mlxsw_sp_fib_entry *fib_entry)
1699 {
1700 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1701 		fib_info_offload_dec(fib_entry->fi);
1702 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
1703 		mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1704 }
1705 
1706 static struct mlxsw_sp_fib_entry *
1707 mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp,
1708 		       const struct fib_entry_notifier_info *fen_info)
1709 {
1710 	struct mlxsw_sp_fib_entry *fib_entry;
1711 	struct fib_info *fi = fen_info->fi;
1712 	struct mlxsw_sp_vr *vr;
1713 	int err;
1714 
1715 	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
1716 			     MLXSW_SP_L3_PROTO_IPV4);
1717 	if (IS_ERR(vr))
1718 		return ERR_CAST(vr);
1719 
1720 	fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
1721 					      sizeof(fen_info->dst),
1722 					      fen_info->dst_len, fi->fib_dev);
1723 	if (fib_entry) {
1724 		/* Already exists, just take a reference */
1725 		fib_entry->ref_count++;
1726 		return fib_entry;
1727 	}
1728 	fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst,
1729 					      sizeof(fen_info->dst),
1730 					      fen_info->dst_len, fi->fib_dev);
1731 	if (!fib_entry) {
1732 		err = -ENOMEM;
1733 		goto err_fib_entry_create;
1734 	}
1735 	fib_entry->vr = vr;
1736 	fib_entry->fi = fi;
1737 	fib_entry->ref_count = 1;
1738 
1739 	err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry);
1740 	if (err)
1741 		goto err_fib4_entry_init;
1742 
1743 	return fib_entry;
1744 
1745 err_fib4_entry_init:
1746 	mlxsw_sp_fib_entry_destroy(fib_entry);
1747 err_fib_entry_create:
1748 	mlxsw_sp_vr_put(mlxsw_sp, vr);
1749 
1750 	return ERR_PTR(err);
1751 }
1752 
1753 static struct mlxsw_sp_fib_entry *
1754 mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp,
1755 			const struct fib_entry_notifier_info *fen_info)
1756 {
1757 	struct mlxsw_sp_vr *vr;
1758 
1759 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id,
1760 			      MLXSW_SP_L3_PROTO_IPV4);
1761 	if (!vr)
1762 		return NULL;
1763 
1764 	return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
1765 					 sizeof(fen_info->dst),
1766 					 fen_info->dst_len,
1767 					 fen_info->fi->fib_dev);
1768 }
1769 
1770 static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp,
1771 				   struct mlxsw_sp_fib_entry *fib_entry)
1772 {
1773 	struct mlxsw_sp_vr *vr = fib_entry->vr;
1774 
1775 	if (--fib_entry->ref_count == 0) {
1776 		mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
1777 		mlxsw_sp_fib_entry_destroy(fib_entry);
1778 	}
1779 	mlxsw_sp_vr_put(mlxsw_sp, vr);
1780 }
1781 
1782 static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp,
1783 				       struct mlxsw_sp_fib_entry *fib_entry)
1784 {
1785 	unsigned int last_ref_count;
1786 
1787 	do {
1788 		last_ref_count = fib_entry->ref_count;
1789 		mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1790 	} while (last_ref_count != 1);
1791 }
1792 
1793 static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
1794 				    struct fib_entry_notifier_info *fen_info)
1795 {
1796 	struct mlxsw_sp_fib_entry *fib_entry;
1797 	struct mlxsw_sp_vr *vr;
1798 	int err;
1799 
1800 	if (mlxsw_sp->router.aborted)
1801 		return 0;
1802 
1803 	fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info);
1804 	if (IS_ERR(fib_entry)) {
1805 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n");
1806 		return PTR_ERR(fib_entry);
1807 	}
1808 
1809 	if (fib_entry->ref_count != 1)
1810 		return 0;
1811 
1812 	vr = fib_entry->vr;
1813 	err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry);
1814 	if (err) {
1815 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n");
1816 		goto err_fib_entry_insert;
1817 	}
1818 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1819 	if (err)
1820 		goto err_fib_entry_add;
1821 	return 0;
1822 
1823 err_fib_entry_add:
1824 	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
1825 err_fib_entry_insert:
1826 	mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1827 	return err;
1828 }
1829 
1830 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
1831 				     struct fib_entry_notifier_info *fen_info)
1832 {
1833 	struct mlxsw_sp_fib_entry *fib_entry;
1834 
1835 	if (mlxsw_sp->router.aborted)
1836 		return;
1837 
1838 	fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info);
1839 	if (!fib_entry)
1840 		return;
1841 
1842 	if (fib_entry->ref_count == 1) {
1843 		mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
1844 		mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry);
1845 	}
1846 
1847 	mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
1848 }
1849 
1850 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
1851 {
1852 	char ralta_pl[MLXSW_REG_RALTA_LEN];
1853 	char ralst_pl[MLXSW_REG_RALST_LEN];
1854 	char raltb_pl[MLXSW_REG_RALTB_LEN];
1855 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1856 	int err;
1857 
1858 	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
1859 			     MLXSW_SP_LPM_TREE_MIN);
1860 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
1861 	if (err)
1862 		return err;
1863 
1864 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
1865 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
1866 	if (err)
1867 		return err;
1868 
1869 	mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
1870 			     MLXSW_SP_LPM_TREE_MIN);
1871 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
1872 	if (err)
1873 		return err;
1874 
1875 	mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
1876 			      MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
1877 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1878 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1879 }
1880 
1881 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
1882 {
1883 	struct mlxsw_sp_fib_entry *fib_entry;
1884 	struct mlxsw_sp_fib_entry *tmp;
1885 	struct mlxsw_sp_vr *vr;
1886 	int i;
1887 
1888 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
1889 		vr = &mlxsw_sp->router.vrs[i];
1890 
1891 		if (!vr->used)
1892 			continue;
1893 
1894 		list_for_each_entry_safe(fib_entry, tmp,
1895 					 &vr->fib->entry_list, list) {
1896 			bool do_break = &tmp->list == &vr->fib->entry_list;
1897 
1898 			mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
1899 			mlxsw_sp_fib_entry_remove(fib_entry->vr->fib,
1900 						  fib_entry);
1901 			mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry);
1902 			if (do_break)
1903 				break;
1904 		}
1905 	}
1906 }
1907 
1908 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
1909 {
1910 	int err;
1911 
1912 	if (mlxsw_sp->router.aborted)
1913 		return;
1914 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
1915 	mlxsw_sp_router_fib_flush(mlxsw_sp);
1916 	mlxsw_sp->router.aborted = true;
1917 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
1918 	if (err)
1919 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
1920 }
1921 
1922 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
1923 {
1924 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
1925 	u64 max_rifs;
1926 	int err;
1927 
1928 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
1929 		return -EIO;
1930 
1931 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
1932 	mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
1933 				 GFP_KERNEL);
1934 	if (!mlxsw_sp->rifs)
1935 		return -ENOMEM;
1936 
1937 	mlxsw_reg_rgcr_pack(rgcr_pl, true);
1938 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
1939 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
1940 	if (err)
1941 		goto err_rgcr_fail;
1942 
1943 	return 0;
1944 
1945 err_rgcr_fail:
1946 	kfree(mlxsw_sp->rifs);
1947 	return err;
1948 }
1949 
1950 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
1951 {
1952 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
1953 	int i;
1954 
1955 	mlxsw_reg_rgcr_pack(rgcr_pl, false);
1956 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
1957 
1958 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
1959 		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
1960 
1961 	kfree(mlxsw_sp->rifs);
1962 }
1963 
1964 struct mlxsw_sp_fib_event_work {
1965 	struct delayed_work dw;
1966 	struct fib_entry_notifier_info fen_info;
1967 	struct mlxsw_sp *mlxsw_sp;
1968 	unsigned long event;
1969 };
1970 
1971 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
1972 {
1973 	struct mlxsw_sp_fib_event_work *fib_work =
1974 		container_of(work, struct mlxsw_sp_fib_event_work, dw.work);
1975 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
1976 	int err;
1977 
1978 	/* Protect internal structures from changes */
1979 	rtnl_lock();
1980 	switch (fib_work->event) {
1981 	case FIB_EVENT_ENTRY_ADD:
1982 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info);
1983 		if (err)
1984 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
1985 		fib_info_put(fib_work->fen_info.fi);
1986 		break;
1987 	case FIB_EVENT_ENTRY_DEL:
1988 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
1989 		fib_info_put(fib_work->fen_info.fi);
1990 		break;
1991 	case FIB_EVENT_RULE_ADD: /* fall through */
1992 	case FIB_EVENT_RULE_DEL:
1993 		mlxsw_sp_router_fib4_abort(mlxsw_sp);
1994 		break;
1995 	}
1996 	rtnl_unlock();
1997 	kfree(fib_work);
1998 }
1999 
2000 /* Called with rcu_read_lock() */
2001 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2002 				     unsigned long event, void *ptr)
2003 {
2004 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2005 	struct mlxsw_sp_fib_event_work *fib_work;
2006 	struct fib_notifier_info *info = ptr;
2007 
2008 	if (!net_eq(info->net, &init_net))
2009 		return NOTIFY_DONE;
2010 
2011 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2012 	if (WARN_ON(!fib_work))
2013 		return NOTIFY_BAD;
2014 
2015 	INIT_DELAYED_WORK(&fib_work->dw, mlxsw_sp_router_fib_event_work);
2016 	fib_work->mlxsw_sp = mlxsw_sp;
2017 	fib_work->event = event;
2018 
2019 	switch (event) {
2020 	case FIB_EVENT_ENTRY_ADD: /* fall through */
2021 	case FIB_EVENT_ENTRY_DEL:
2022 		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2023 		/* Take referece on fib_info to prevent it from being
2024 		 * freed while work is queued. Release it afterwards.
2025 		 */
2026 		fib_info_hold(fib_work->fen_info.fi);
2027 		break;
2028 	}
2029 
2030 	mlxsw_core_schedule_odw(&fib_work->dw, 0);
2031 
2032 	return NOTIFY_DONE;
2033 }
2034 
2035 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
2036 {
2037 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2038 
2039 	/* Flush pending FIB notifications and then flush the device's
2040 	 * table before requesting another dump. The FIB notification
2041 	 * block is unregistered, so no need to take RTNL.
2042 	 */
2043 	mlxsw_core_flush_owq();
2044 	mlxsw_sp_router_fib_flush(mlxsw_sp);
2045 }
2046 
2047 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
2048 {
2049 	int err;
2050 
2051 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
2052 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
2053 	err = __mlxsw_sp_router_init(mlxsw_sp);
2054 	if (err)
2055 		return err;
2056 
2057 	mlxsw_sp_lpm_init(mlxsw_sp);
2058 	err = mlxsw_sp_vrs_init(mlxsw_sp);
2059 	if (err)
2060 		goto err_vrs_init;
2061 
2062 	err = mlxsw_sp_neigh_init(mlxsw_sp);
2063 	if (err)
2064 		goto err_neigh_init;
2065 
2066 	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
2067 	err = register_fib_notifier(&mlxsw_sp->fib_nb,
2068 				    mlxsw_sp_router_fib_dump_flush);
2069 	if (err)
2070 		goto err_register_fib_notifier;
2071 
2072 	return 0;
2073 
2074 err_register_fib_notifier:
2075 	mlxsw_sp_neigh_fini(mlxsw_sp);
2076 err_neigh_init:
2077 	mlxsw_sp_vrs_fini(mlxsw_sp);
2078 err_vrs_init:
2079 	__mlxsw_sp_router_fini(mlxsw_sp);
2080 	return err;
2081 }
2082 
2083 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
2084 {
2085 	unregister_fib_notifier(&mlxsw_sp->fib_nb);
2086 	mlxsw_sp_neigh_fini(mlxsw_sp);
2087 	mlxsw_sp_vrs_fini(mlxsw_sp);
2088 	__mlxsw_sp_router_fini(mlxsw_sp);
2089 }
2090