1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the names of the copyright holders nor the names of its
17  *    contributors may be used to endorse or promote products derived from
18  *    this software without specific prior written permission.
19  *
20  * Alternatively, this software may be distributed under the terms of the
21  * GNU General Public License ("GPL") version 2 as published by the Free
22  * Software Foundation.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <linux/netdevice.h>
45 #include <net/netevent.h>
46 #include <net/neighbour.h>
47 #include <net/arp.h>
48 #include <net/ip_fib.h>
49 #include <net/fib_rules.h>
50 #include <net/l3mdev.h>
51 
52 #include "spectrum.h"
53 #include "core.h"
54 #include "reg.h"
55 #include "spectrum_cnt.h"
56 #include "spectrum_dpipe.h"
57 #include "spectrum_router.h"
58 
59 struct mlxsw_sp_rif {
60 	struct list_head nexthop_list;
61 	struct list_head neigh_list;
62 	struct net_device *dev;
63 	struct mlxsw_sp_fid *f;
64 	unsigned char addr[ETH_ALEN];
65 	int mtu;
66 	u16 rif_index;
67 	u16 vr_id;
68 	unsigned int counter_ingress;
69 	bool counter_ingress_valid;
70 	unsigned int counter_egress;
71 	bool counter_egress_valid;
72 };
73 
74 static unsigned int *
75 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
76 			   enum mlxsw_sp_rif_counter_dir dir)
77 {
78 	switch (dir) {
79 	case MLXSW_SP_RIF_COUNTER_EGRESS:
80 		return &rif->counter_egress;
81 	case MLXSW_SP_RIF_COUNTER_INGRESS:
82 		return &rif->counter_ingress;
83 	}
84 	return NULL;
85 }
86 
87 static bool
88 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
89 			       enum mlxsw_sp_rif_counter_dir dir)
90 {
91 	switch (dir) {
92 	case MLXSW_SP_RIF_COUNTER_EGRESS:
93 		return rif->counter_egress_valid;
94 	case MLXSW_SP_RIF_COUNTER_INGRESS:
95 		return rif->counter_ingress_valid;
96 	}
97 	return false;
98 }
99 
100 static void
101 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
102 			       enum mlxsw_sp_rif_counter_dir dir,
103 			       bool valid)
104 {
105 	switch (dir) {
106 	case MLXSW_SP_RIF_COUNTER_EGRESS:
107 		rif->counter_egress_valid = valid;
108 		break;
109 	case MLXSW_SP_RIF_COUNTER_INGRESS:
110 		rif->counter_ingress_valid = valid;
111 		break;
112 	}
113 }
114 
115 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
116 				     unsigned int counter_index, bool enable,
117 				     enum mlxsw_sp_rif_counter_dir dir)
118 {
119 	char ritr_pl[MLXSW_REG_RITR_LEN];
120 	bool is_egress = false;
121 	int err;
122 
123 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
124 		is_egress = true;
125 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
126 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
127 	if (err)
128 		return err;
129 
130 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
131 				    is_egress);
132 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
133 }
134 
135 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
136 				   struct mlxsw_sp_rif *rif,
137 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
138 {
139 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
140 	unsigned int *p_counter_index;
141 	bool valid;
142 	int err;
143 
144 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
145 	if (!valid)
146 		return -EINVAL;
147 
148 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
149 	if (!p_counter_index)
150 		return -EINVAL;
151 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
152 			     MLXSW_REG_RICNT_OPCODE_NOP);
153 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
154 	if (err)
155 		return err;
156 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
157 	return 0;
158 }
159 
160 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
161 				      unsigned int counter_index)
162 {
163 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
164 
165 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
166 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
167 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
168 }
169 
170 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
171 			       struct mlxsw_sp_rif *rif,
172 			       enum mlxsw_sp_rif_counter_dir dir)
173 {
174 	unsigned int *p_counter_index;
175 	int err;
176 
177 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
178 	if (!p_counter_index)
179 		return -EINVAL;
180 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
181 				     p_counter_index);
182 	if (err)
183 		return err;
184 
185 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
186 	if (err)
187 		goto err_counter_clear;
188 
189 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
190 					*p_counter_index, true, dir);
191 	if (err)
192 		goto err_counter_edit;
193 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
194 	return 0;
195 
196 err_counter_edit:
197 err_counter_clear:
198 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
199 			      *p_counter_index);
200 	return err;
201 }
202 
203 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
204 			       struct mlxsw_sp_rif *rif,
205 			       enum mlxsw_sp_rif_counter_dir dir)
206 {
207 	unsigned int *p_counter_index;
208 
209 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
210 		return;
211 
212 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
213 	if (WARN_ON(!p_counter_index))
214 		return;
215 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
216 				  *p_counter_index, false, dir);
217 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
218 			      *p_counter_index);
219 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
220 }
221 
222 static struct mlxsw_sp_rif *
223 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
224 			 const struct net_device *dev);
225 
226 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
227 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
228 
229 static bool
230 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
231 			     struct mlxsw_sp_prefix_usage *prefix_usage2)
232 {
233 	unsigned char prefix;
234 
235 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
236 		if (!test_bit(prefix, prefix_usage2->b))
237 			return false;
238 	}
239 	return true;
240 }
241 
242 static bool
243 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
244 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
245 {
246 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
247 }
248 
249 static bool
250 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
251 {
252 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
253 
254 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
255 }
256 
257 static void
258 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
259 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
260 {
261 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
262 }
263 
264 static void
265 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
266 			  unsigned char prefix_len)
267 {
268 	set_bit(prefix_len, prefix_usage->b);
269 }
270 
271 static void
272 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
273 			    unsigned char prefix_len)
274 {
275 	clear_bit(prefix_len, prefix_usage->b);
276 }
277 
278 struct mlxsw_sp_fib_key {
279 	unsigned char addr[sizeof(struct in6_addr)];
280 	unsigned char prefix_len;
281 };
282 
283 enum mlxsw_sp_fib_entry_type {
284 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
285 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
286 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
287 };
288 
289 struct mlxsw_sp_nexthop_group;
290 
291 struct mlxsw_sp_fib_node {
292 	struct list_head entry_list;
293 	struct list_head list;
294 	struct rhash_head ht_node;
295 	struct mlxsw_sp_fib *fib;
296 	struct mlxsw_sp_fib_key key;
297 };
298 
299 struct mlxsw_sp_fib_entry_params {
300 	u32 tb_id;
301 	u32 prio;
302 	u8 tos;
303 	u8 type;
304 };
305 
306 struct mlxsw_sp_fib_entry {
307 	struct list_head list;
308 	struct mlxsw_sp_fib_node *fib_node;
309 	enum mlxsw_sp_fib_entry_type type;
310 	struct list_head nexthop_group_node;
311 	struct mlxsw_sp_nexthop_group *nh_group;
312 	struct mlxsw_sp_fib_entry_params params;
313 	bool offloaded;
314 };
315 
316 struct mlxsw_sp_fib {
317 	struct rhashtable ht;
318 	struct list_head node_list;
319 	struct mlxsw_sp_vr *vr;
320 	struct mlxsw_sp_lpm_tree *lpm_tree;
321 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
322 	struct mlxsw_sp_prefix_usage prefix_usage;
323 	enum mlxsw_sp_l3proto proto;
324 };
325 
326 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
327 
328 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
329 						enum mlxsw_sp_l3proto proto)
330 {
331 	struct mlxsw_sp_fib *fib;
332 	int err;
333 
334 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
335 	if (!fib)
336 		return ERR_PTR(-ENOMEM);
337 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
338 	if (err)
339 		goto err_rhashtable_init;
340 	INIT_LIST_HEAD(&fib->node_list);
341 	fib->proto = proto;
342 	fib->vr = vr;
343 	return fib;
344 
345 err_rhashtable_init:
346 	kfree(fib);
347 	return ERR_PTR(err);
348 }
349 
350 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
351 {
352 	WARN_ON(!list_empty(&fib->node_list));
353 	WARN_ON(fib->lpm_tree);
354 	rhashtable_destroy(&fib->ht);
355 	kfree(fib);
356 }
357 
358 static struct mlxsw_sp_lpm_tree *
359 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
360 {
361 	static struct mlxsw_sp_lpm_tree *lpm_tree;
362 	int i;
363 
364 	for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
365 		lpm_tree = &mlxsw_sp->router.lpm.trees[i];
366 		if (lpm_tree->ref_count == 0)
367 			return lpm_tree;
368 	}
369 	return NULL;
370 }
371 
372 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
373 				   struct mlxsw_sp_lpm_tree *lpm_tree)
374 {
375 	char ralta_pl[MLXSW_REG_RALTA_LEN];
376 
377 	mlxsw_reg_ralta_pack(ralta_pl, true,
378 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
379 			     lpm_tree->id);
380 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
381 }
382 
383 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
384 				  struct mlxsw_sp_lpm_tree *lpm_tree)
385 {
386 	char ralta_pl[MLXSW_REG_RALTA_LEN];
387 
388 	mlxsw_reg_ralta_pack(ralta_pl, false,
389 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
390 			     lpm_tree->id);
391 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
392 }
393 
394 static int
395 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
396 				  struct mlxsw_sp_prefix_usage *prefix_usage,
397 				  struct mlxsw_sp_lpm_tree *lpm_tree)
398 {
399 	char ralst_pl[MLXSW_REG_RALST_LEN];
400 	u8 root_bin = 0;
401 	u8 prefix;
402 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
403 
404 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
405 		root_bin = prefix;
406 
407 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
408 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
409 		if (prefix == 0)
410 			continue;
411 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
412 					 MLXSW_REG_RALST_BIN_NO_CHILD);
413 		last_prefix = prefix;
414 	}
415 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
416 }
417 
418 static struct mlxsw_sp_lpm_tree *
419 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
420 			 struct mlxsw_sp_prefix_usage *prefix_usage,
421 			 enum mlxsw_sp_l3proto proto)
422 {
423 	struct mlxsw_sp_lpm_tree *lpm_tree;
424 	int err;
425 
426 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
427 	if (!lpm_tree)
428 		return ERR_PTR(-EBUSY);
429 	lpm_tree->proto = proto;
430 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
431 	if (err)
432 		return ERR_PTR(err);
433 
434 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
435 						lpm_tree);
436 	if (err)
437 		goto err_left_struct_set;
438 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
439 	       sizeof(lpm_tree->prefix_usage));
440 	return lpm_tree;
441 
442 err_left_struct_set:
443 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
444 	return ERR_PTR(err);
445 }
446 
447 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
448 				     struct mlxsw_sp_lpm_tree *lpm_tree)
449 {
450 	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
451 }
452 
453 static struct mlxsw_sp_lpm_tree *
454 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
455 		      struct mlxsw_sp_prefix_usage *prefix_usage,
456 		      enum mlxsw_sp_l3proto proto)
457 {
458 	struct mlxsw_sp_lpm_tree *lpm_tree;
459 	int i;
460 
461 	for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
462 		lpm_tree = &mlxsw_sp->router.lpm.trees[i];
463 		if (lpm_tree->ref_count != 0 &&
464 		    lpm_tree->proto == proto &&
465 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
466 					     prefix_usage))
467 			goto inc_ref_count;
468 	}
469 	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
470 					    proto);
471 	if (IS_ERR(lpm_tree))
472 		return lpm_tree;
473 
474 inc_ref_count:
475 	lpm_tree->ref_count++;
476 	return lpm_tree;
477 }
478 
479 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
480 				 struct mlxsw_sp_lpm_tree *lpm_tree)
481 {
482 	if (--lpm_tree->ref_count == 0)
483 		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
484 	return 0;
485 }
486 
487 #define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */
488 
489 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
490 {
491 	struct mlxsw_sp_lpm_tree *lpm_tree;
492 	u64 max_trees;
493 	int i;
494 
495 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
496 		return -EIO;
497 
498 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
499 	mlxsw_sp->router.lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
500 	mlxsw_sp->router.lpm.trees = kcalloc(mlxsw_sp->router.lpm.tree_count,
501 					     sizeof(struct mlxsw_sp_lpm_tree),
502 					     GFP_KERNEL);
503 	if (!mlxsw_sp->router.lpm.trees)
504 		return -ENOMEM;
505 
506 	for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) {
507 		lpm_tree = &mlxsw_sp->router.lpm.trees[i];
508 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
509 	}
510 
511 	return 0;
512 }
513 
514 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
515 {
516 	kfree(mlxsw_sp->router.lpm.trees);
517 }
518 
519 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
520 {
521 	return !!vr->fib4;
522 }
523 
524 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
525 {
526 	struct mlxsw_sp_vr *vr;
527 	int i;
528 
529 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
530 		vr = &mlxsw_sp->router.vrs[i];
531 		if (!mlxsw_sp_vr_is_used(vr))
532 			return vr;
533 	}
534 	return NULL;
535 }
536 
537 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
538 				     const struct mlxsw_sp_fib *fib)
539 {
540 	char raltb_pl[MLXSW_REG_RALTB_LEN];
541 
542 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
543 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
544 			     fib->lpm_tree->id);
545 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
546 }
547 
548 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
549 				       const struct mlxsw_sp_fib *fib)
550 {
551 	char raltb_pl[MLXSW_REG_RALTB_LEN];
552 
553 	/* Bind to tree 0 which is default */
554 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
555 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
556 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
557 }
558 
559 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
560 {
561 	/* For our purpose, squash main and local table into one */
562 	if (tb_id == RT_TABLE_LOCAL)
563 		tb_id = RT_TABLE_MAIN;
564 	return tb_id;
565 }
566 
567 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
568 					    u32 tb_id)
569 {
570 	struct mlxsw_sp_vr *vr;
571 	int i;
572 
573 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
574 
575 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
576 		vr = &mlxsw_sp->router.vrs[i];
577 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
578 			return vr;
579 	}
580 	return NULL;
581 }
582 
583 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
584 					    enum mlxsw_sp_l3proto proto)
585 {
586 	switch (proto) {
587 	case MLXSW_SP_L3_PROTO_IPV4:
588 		return vr->fib4;
589 	case MLXSW_SP_L3_PROTO_IPV6:
590 		BUG_ON(1);
591 	}
592 	return NULL;
593 }
594 
595 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
596 					      u32 tb_id)
597 {
598 	struct mlxsw_sp_vr *vr;
599 
600 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
601 	if (!vr)
602 		return ERR_PTR(-EBUSY);
603 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
604 	if (IS_ERR(vr->fib4))
605 		return ERR_CAST(vr->fib4);
606 	vr->tb_id = tb_id;
607 	return vr;
608 }
609 
610 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
611 {
612 	mlxsw_sp_fib_destroy(vr->fib4);
613 	vr->fib4 = NULL;
614 }
615 
616 static int
617 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib,
618 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
619 {
620 	struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree;
621 	struct mlxsw_sp_lpm_tree *new_tree;
622 	int err;
623 
624 	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
625 		return 0;
626 
627 	new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
628 					 fib->proto);
629 	if (IS_ERR(new_tree)) {
630 		/* We failed to get a tree according to the required
631 		 * prefix usage. However, the current tree might be still good
632 		 * for us if our requirement is subset of the prefixes used
633 		 * in the tree.
634 		 */
635 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
636 						 &lpm_tree->prefix_usage))
637 			return 0;
638 		return PTR_ERR(new_tree);
639 	}
640 
641 	/* Prevent packet loss by overwriting existing binding */
642 	fib->lpm_tree = new_tree;
643 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
644 	if (err)
645 		goto err_tree_bind;
646 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
647 
648 	return 0;
649 
650 err_tree_bind:
651 	fib->lpm_tree = lpm_tree;
652 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
653 	return err;
654 }
655 
656 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
657 {
658 	struct mlxsw_sp_vr *vr;
659 
660 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
661 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
662 	if (!vr)
663 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
664 	return vr;
665 }
666 
667 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
668 {
669 	if (!vr->rif_count && list_empty(&vr->fib4->node_list))
670 		mlxsw_sp_vr_destroy(vr);
671 }
672 
673 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
674 {
675 	struct mlxsw_sp_vr *vr;
676 	u64 max_vrs;
677 	int i;
678 
679 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
680 		return -EIO;
681 
682 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
683 	mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
684 				       GFP_KERNEL);
685 	if (!mlxsw_sp->router.vrs)
686 		return -ENOMEM;
687 
688 	for (i = 0; i < max_vrs; i++) {
689 		vr = &mlxsw_sp->router.vrs[i];
690 		vr->id = i;
691 	}
692 
693 	return 0;
694 }
695 
696 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
697 
698 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
699 {
700 	/* At this stage we're guaranteed not to have new incoming
701 	 * FIB notifications and the work queue is free from FIBs
702 	 * sitting on top of mlxsw netdevs. However, we can still
703 	 * have other FIBs queued. Flush the queue before flushing
704 	 * the device's tables. No need for locks, as we're the only
705 	 * writer.
706 	 */
707 	mlxsw_core_flush_owq();
708 	mlxsw_sp_router_fib_flush(mlxsw_sp);
709 	kfree(mlxsw_sp->router.vrs);
710 }
711 
712 struct mlxsw_sp_neigh_key {
713 	struct neighbour *n;
714 };
715 
716 struct mlxsw_sp_neigh_entry {
717 	struct list_head rif_list_node;
718 	struct rhash_head ht_node;
719 	struct mlxsw_sp_neigh_key key;
720 	u16 rif;
721 	bool connected;
722 	unsigned char ha[ETH_ALEN];
723 	struct list_head nexthop_list; /* list of nexthops using
724 					* this neigh entry
725 					*/
726 	struct list_head nexthop_neighs_list_node;
727 };
728 
729 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
730 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
731 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
732 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
733 };
734 
735 static struct mlxsw_sp_neigh_entry *
736 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
737 			   u16 rif)
738 {
739 	struct mlxsw_sp_neigh_entry *neigh_entry;
740 
741 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
742 	if (!neigh_entry)
743 		return NULL;
744 
745 	neigh_entry->key.n = n;
746 	neigh_entry->rif = rif;
747 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
748 
749 	return neigh_entry;
750 }
751 
752 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
753 {
754 	kfree(neigh_entry);
755 }
756 
757 static int
758 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
759 			    struct mlxsw_sp_neigh_entry *neigh_entry)
760 {
761 	return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
762 				      &neigh_entry->ht_node,
763 				      mlxsw_sp_neigh_ht_params);
764 }
765 
766 static void
767 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
768 			    struct mlxsw_sp_neigh_entry *neigh_entry)
769 {
770 	rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
771 			       &neigh_entry->ht_node,
772 			       mlxsw_sp_neigh_ht_params);
773 }
774 
775 static struct mlxsw_sp_neigh_entry *
776 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
777 {
778 	struct mlxsw_sp_neigh_entry *neigh_entry;
779 	struct mlxsw_sp_rif *rif;
780 	int err;
781 
782 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
783 	if (!rif)
784 		return ERR_PTR(-EINVAL);
785 
786 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
787 	if (!neigh_entry)
788 		return ERR_PTR(-ENOMEM);
789 
790 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
791 	if (err)
792 		goto err_neigh_entry_insert;
793 
794 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
795 
796 	return neigh_entry;
797 
798 err_neigh_entry_insert:
799 	mlxsw_sp_neigh_entry_free(neigh_entry);
800 	return ERR_PTR(err);
801 }
802 
803 static void
804 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
805 			     struct mlxsw_sp_neigh_entry *neigh_entry)
806 {
807 	list_del(&neigh_entry->rif_list_node);
808 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
809 	mlxsw_sp_neigh_entry_free(neigh_entry);
810 }
811 
812 static struct mlxsw_sp_neigh_entry *
813 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
814 {
815 	struct mlxsw_sp_neigh_key key;
816 
817 	key.n = n;
818 	return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
819 				      &key, mlxsw_sp_neigh_ht_params);
820 }
821 
822 static void
823 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
824 {
825 	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
826 
827 	mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
828 }
829 
830 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
831 						   char *rauhtd_pl,
832 						   int ent_index)
833 {
834 	struct net_device *dev;
835 	struct neighbour *n;
836 	__be32 dipn;
837 	u32 dip;
838 	u16 rif;
839 
840 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
841 
842 	if (!mlxsw_sp->rifs[rif]) {
843 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
844 		return;
845 	}
846 
847 	dipn = htonl(dip);
848 	dev = mlxsw_sp->rifs[rif]->dev;
849 	n = neigh_lookup(&arp_tbl, &dipn, dev);
850 	if (!n) {
851 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
852 			   &dip);
853 		return;
854 	}
855 
856 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
857 	neigh_event_send(n, NULL);
858 	neigh_release(n);
859 }
860 
861 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
862 						   char *rauhtd_pl,
863 						   int rec_index)
864 {
865 	u8 num_entries;
866 	int i;
867 
868 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
869 								rec_index);
870 	/* Hardware starts counting at 0, so add 1. */
871 	num_entries++;
872 
873 	/* Each record consists of several neighbour entries. */
874 	for (i = 0; i < num_entries; i++) {
875 		int ent_index;
876 
877 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
878 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
879 						       ent_index);
880 	}
881 
882 }
883 
884 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
885 					      char *rauhtd_pl, int rec_index)
886 {
887 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
888 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
889 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
890 						       rec_index);
891 		break;
892 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
893 		WARN_ON_ONCE(1);
894 		break;
895 	}
896 }
897 
898 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
899 {
900 	u8 num_rec, last_rec_index, num_entries;
901 
902 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
903 	last_rec_index = num_rec - 1;
904 
905 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
906 		return false;
907 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
908 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
909 		return true;
910 
911 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
912 								last_rec_index);
913 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
914 		return true;
915 	return false;
916 }
917 
918 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
919 {
920 	char *rauhtd_pl;
921 	u8 num_rec;
922 	int i, err;
923 
924 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
925 	if (!rauhtd_pl)
926 		return -ENOMEM;
927 
928 	/* Make sure the neighbour's netdev isn't removed in the
929 	 * process.
930 	 */
931 	rtnl_lock();
932 	do {
933 		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
934 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
935 				      rauhtd_pl);
936 		if (err) {
937 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
938 			break;
939 		}
940 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
941 		for (i = 0; i < num_rec; i++)
942 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
943 							  i);
944 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
945 	rtnl_unlock();
946 
947 	kfree(rauhtd_pl);
948 	return err;
949 }
950 
951 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
952 {
953 	struct mlxsw_sp_neigh_entry *neigh_entry;
954 
955 	/* Take RTNL mutex here to prevent lists from changes */
956 	rtnl_lock();
957 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
958 			    nexthop_neighs_list_node)
959 		/* If this neigh have nexthops, make the kernel think this neigh
960 		 * is active regardless of the traffic.
961 		 */
962 		neigh_event_send(neigh_entry->key.n, NULL);
963 	rtnl_unlock();
964 }
965 
966 static void
967 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
968 {
969 	unsigned long interval = mlxsw_sp->router.neighs_update.interval;
970 
971 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
972 			       msecs_to_jiffies(interval));
973 }
974 
975 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
976 {
977 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
978 						 router.neighs_update.dw.work);
979 	int err;
980 
981 	err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
982 	if (err)
983 		dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
984 
985 	mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
986 
987 	mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
988 }
989 
990 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
991 {
992 	struct mlxsw_sp_neigh_entry *neigh_entry;
993 	struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
994 						 router.nexthop_probe_dw.work);
995 
996 	/* Iterate over nexthop neighbours, find those who are unresolved and
997 	 * send arp on them. This solves the chicken-egg problem when
998 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
999 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
1000 	 * using different nexthop.
1001 	 *
1002 	 * Take RTNL mutex here to prevent lists from changes.
1003 	 */
1004 	rtnl_lock();
1005 	list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
1006 			    nexthop_neighs_list_node)
1007 		if (!neigh_entry->connected)
1008 			neigh_event_send(neigh_entry->key.n, NULL);
1009 	rtnl_unlock();
1010 
1011 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
1012 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
1013 }
1014 
1015 static void
1016 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1017 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1018 			      bool removing);
1019 
1020 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
1021 {
1022 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
1023 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
1024 }
1025 
1026 static void
1027 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
1028 				struct mlxsw_sp_neigh_entry *neigh_entry,
1029 				enum mlxsw_reg_rauht_op op)
1030 {
1031 	struct neighbour *n = neigh_entry->key.n;
1032 	u32 dip = ntohl(*((__be32 *) n->primary_key));
1033 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1034 
1035 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1036 			      dip);
1037 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1038 }
1039 
1040 static void
1041 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
1042 			    struct mlxsw_sp_neigh_entry *neigh_entry,
1043 			    bool adding)
1044 {
1045 	if (!adding && !neigh_entry->connected)
1046 		return;
1047 	neigh_entry->connected = adding;
1048 	if (neigh_entry->key.n->tbl == &arp_tbl)
1049 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
1050 						mlxsw_sp_rauht_op(adding));
1051 	else
1052 		WARN_ON_ONCE(1);
1053 }
1054 
1055 struct mlxsw_sp_neigh_event_work {
1056 	struct work_struct work;
1057 	struct mlxsw_sp *mlxsw_sp;
1058 	struct neighbour *n;
1059 };
1060 
1061 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
1062 {
1063 	struct mlxsw_sp_neigh_event_work *neigh_work =
1064 		container_of(work, struct mlxsw_sp_neigh_event_work, work);
1065 	struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
1066 	struct mlxsw_sp_neigh_entry *neigh_entry;
1067 	struct neighbour *n = neigh_work->n;
1068 	unsigned char ha[ETH_ALEN];
1069 	bool entry_connected;
1070 	u8 nud_state, dead;
1071 
1072 	/* If these parameters are changed after we release the lock,
1073 	 * then we are guaranteed to receive another event letting us
1074 	 * know about it.
1075 	 */
1076 	read_lock_bh(&n->lock);
1077 	memcpy(ha, n->ha, ETH_ALEN);
1078 	nud_state = n->nud_state;
1079 	dead = n->dead;
1080 	read_unlock_bh(&n->lock);
1081 
1082 	rtnl_lock();
1083 	entry_connected = nud_state & NUD_VALID && !dead;
1084 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1085 	if (!entry_connected && !neigh_entry)
1086 		goto out;
1087 	if (!neigh_entry) {
1088 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1089 		if (IS_ERR(neigh_entry))
1090 			goto out;
1091 	}
1092 
1093 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
1094 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
1095 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
1096 
1097 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1098 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1099 
1100 out:
1101 	rtnl_unlock();
1102 	neigh_release(n);
1103 	kfree(neigh_work);
1104 }
1105 
1106 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
1107 				   unsigned long event, void *ptr)
1108 {
1109 	struct mlxsw_sp_neigh_event_work *neigh_work;
1110 	struct mlxsw_sp_port *mlxsw_sp_port;
1111 	struct mlxsw_sp *mlxsw_sp;
1112 	unsigned long interval;
1113 	struct neigh_parms *p;
1114 	struct neighbour *n;
1115 
1116 	switch (event) {
1117 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
1118 		p = ptr;
1119 
1120 		/* We don't care about changes in the default table. */
1121 		if (!p->dev || p->tbl != &arp_tbl)
1122 			return NOTIFY_DONE;
1123 
1124 		/* We are in atomic context and can't take RTNL mutex,
1125 		 * so use RCU variant to walk the device chain.
1126 		 */
1127 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
1128 		if (!mlxsw_sp_port)
1129 			return NOTIFY_DONE;
1130 
1131 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1132 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
1133 		mlxsw_sp->router.neighs_update.interval = interval;
1134 
1135 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
1136 		break;
1137 	case NETEVENT_NEIGH_UPDATE:
1138 		n = ptr;
1139 
1140 		if (n->tbl != &arp_tbl)
1141 			return NOTIFY_DONE;
1142 
1143 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1144 		if (!mlxsw_sp_port)
1145 			return NOTIFY_DONE;
1146 
1147 		neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1148 		if (!neigh_work) {
1149 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
1150 			return NOTIFY_BAD;
1151 		}
1152 
1153 		INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1154 		neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1155 		neigh_work->n = n;
1156 
1157 		/* Take a reference to ensure the neighbour won't be
1158 		 * destructed until we drop the reference in delayed
1159 		 * work.
1160 		 */
1161 		neigh_clone(n);
1162 		mlxsw_core_schedule_work(&neigh_work->work);
1163 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
1164 		break;
1165 	}
1166 
1167 	return NOTIFY_DONE;
1168 }
1169 
1170 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1171 {
1172 	int err;
1173 
1174 	err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1175 			      &mlxsw_sp_neigh_ht_params);
1176 	if (err)
1177 		return err;
1178 
1179 	/* Initialize the polling interval according to the default
1180 	 * table.
1181 	 */
1182 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1183 
1184 	/* Create the delayed works for the activity_update */
1185 	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1186 			  mlxsw_sp_router_neighs_update_work);
1187 	INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1188 			  mlxsw_sp_router_probe_unresolved_nexthops);
1189 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1190 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1191 	return 0;
1192 }
1193 
1194 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1195 {
1196 	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1197 	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1198 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1199 }
1200 
1201 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1202 				    const struct mlxsw_sp_rif *rif)
1203 {
1204 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1205 
1206 	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1207 			     rif->rif_index, rif->addr);
1208 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1209 }
1210 
1211 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1212 					 struct mlxsw_sp_rif *rif)
1213 {
1214 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1215 
1216 	mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
1217 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
1218 				 rif_list_node)
1219 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1220 }
1221 
1222 struct mlxsw_sp_nexthop_key {
1223 	struct fib_nh *fib_nh;
1224 };
1225 
1226 struct mlxsw_sp_nexthop {
1227 	struct list_head neigh_list_node; /* member of neigh entry list */
1228 	struct list_head rif_list_node;
1229 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1230 						* this belongs to
1231 						*/
1232 	struct rhash_head ht_node;
1233 	struct mlxsw_sp_nexthop_key key;
1234 	struct mlxsw_sp_rif *rif;
1235 	u8 should_offload:1, /* set indicates this neigh is connected and
1236 			      * should be put to KVD linear area of this group.
1237 			      */
1238 	   offloaded:1, /* set in case the neigh is actually put into
1239 			 * KVD linear area of this group.
1240 			 */
1241 	   update:1; /* set indicates that MAC of this neigh should be
1242 		      * updated in HW
1243 		      */
1244 	struct mlxsw_sp_neigh_entry *neigh_entry;
1245 };
1246 
1247 struct mlxsw_sp_nexthop_group_key {
1248 	struct fib_info *fi;
1249 };
1250 
1251 struct mlxsw_sp_nexthop_group {
1252 	struct rhash_head ht_node;
1253 	struct list_head fib_list; /* list of fib entries that use this group */
1254 	struct mlxsw_sp_nexthop_group_key key;
1255 	u8 adj_index_valid:1,
1256 	   gateway:1; /* routes using the group use a gateway */
1257 	u32 adj_index;
1258 	u16 ecmp_size;
1259 	u16 count;
1260 	struct mlxsw_sp_nexthop nexthops[0];
1261 #define nh_rif	nexthops[0].rif
1262 };
1263 
1264 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1265 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1266 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1267 	.key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1268 };
1269 
1270 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1271 					 struct mlxsw_sp_nexthop_group *nh_grp)
1272 {
1273 	return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1274 				      &nh_grp->ht_node,
1275 				      mlxsw_sp_nexthop_group_ht_params);
1276 }
1277 
1278 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1279 					  struct mlxsw_sp_nexthop_group *nh_grp)
1280 {
1281 	rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1282 			       &nh_grp->ht_node,
1283 			       mlxsw_sp_nexthop_group_ht_params);
1284 }
1285 
1286 static struct mlxsw_sp_nexthop_group *
1287 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1288 			      struct mlxsw_sp_nexthop_group_key key)
1289 {
1290 	return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1291 				      mlxsw_sp_nexthop_group_ht_params);
1292 }
1293 
1294 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1295 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1296 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1297 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
1298 };
1299 
1300 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1301 				   struct mlxsw_sp_nexthop *nh)
1302 {
1303 	return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1304 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1305 }
1306 
1307 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1308 				    struct mlxsw_sp_nexthop *nh)
1309 {
1310 	rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1311 			       mlxsw_sp_nexthop_ht_params);
1312 }
1313 
1314 static struct mlxsw_sp_nexthop *
1315 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1316 			struct mlxsw_sp_nexthop_key key)
1317 {
1318 	return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1319 				      mlxsw_sp_nexthop_ht_params);
1320 }
1321 
1322 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1323 					     const struct mlxsw_sp_fib *fib,
1324 					     u32 adj_index, u16 ecmp_size,
1325 					     u32 new_adj_index,
1326 					     u16 new_ecmp_size)
1327 {
1328 	char raleu_pl[MLXSW_REG_RALEU_LEN];
1329 
1330 	mlxsw_reg_raleu_pack(raleu_pl,
1331 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
1332 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
1333 			     new_ecmp_size);
1334 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1335 }
1336 
1337 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1338 					  struct mlxsw_sp_nexthop_group *nh_grp,
1339 					  u32 old_adj_index, u16 old_ecmp_size)
1340 {
1341 	struct mlxsw_sp_fib_entry *fib_entry;
1342 	struct mlxsw_sp_fib *fib = NULL;
1343 	int err;
1344 
1345 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1346 		if (fib == fib_entry->fib_node->fib)
1347 			continue;
1348 		fib = fib_entry->fib_node->fib;
1349 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
1350 							old_adj_index,
1351 							old_ecmp_size,
1352 							nh_grp->adj_index,
1353 							nh_grp->ecmp_size);
1354 		if (err)
1355 			return err;
1356 	}
1357 	return 0;
1358 }
1359 
1360 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1361 				       struct mlxsw_sp_nexthop *nh)
1362 {
1363 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1364 	char ratr_pl[MLXSW_REG_RATR_LEN];
1365 
1366 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1367 			    true, adj_index, neigh_entry->rif);
1368 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1369 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1370 }
1371 
1372 static int
1373 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1374 				  struct mlxsw_sp_nexthop_group *nh_grp,
1375 				  bool reallocate)
1376 {
1377 	u32 adj_index = nh_grp->adj_index; /* base */
1378 	struct mlxsw_sp_nexthop *nh;
1379 	int i;
1380 	int err;
1381 
1382 	for (i = 0; i < nh_grp->count; i++) {
1383 		nh = &nh_grp->nexthops[i];
1384 
1385 		if (!nh->should_offload) {
1386 			nh->offloaded = 0;
1387 			continue;
1388 		}
1389 
1390 		if (nh->update || reallocate) {
1391 			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1392 							  adj_index, nh);
1393 			if (err)
1394 				return err;
1395 			nh->update = 0;
1396 			nh->offloaded = 1;
1397 		}
1398 		adj_index++;
1399 	}
1400 	return 0;
1401 }
1402 
1403 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1404 				     struct mlxsw_sp_fib_entry *fib_entry);
1405 
1406 static int
1407 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1408 				    struct mlxsw_sp_nexthop_group *nh_grp)
1409 {
1410 	struct mlxsw_sp_fib_entry *fib_entry;
1411 	int err;
1412 
1413 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1414 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1415 		if (err)
1416 			return err;
1417 	}
1418 	return 0;
1419 }
1420 
1421 static void
1422 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1423 			       struct mlxsw_sp_nexthop_group *nh_grp)
1424 {
1425 	struct mlxsw_sp_nexthop *nh;
1426 	bool offload_change = false;
1427 	u32 adj_index;
1428 	u16 ecmp_size = 0;
1429 	bool old_adj_index_valid;
1430 	u32 old_adj_index;
1431 	u16 old_ecmp_size;
1432 	int i;
1433 	int err;
1434 
1435 	if (!nh_grp->gateway) {
1436 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1437 		return;
1438 	}
1439 
1440 	for (i = 0; i < nh_grp->count; i++) {
1441 		nh = &nh_grp->nexthops[i];
1442 
1443 		if (nh->should_offload ^ nh->offloaded) {
1444 			offload_change = true;
1445 			if (nh->should_offload)
1446 				nh->update = 1;
1447 		}
1448 		if (nh->should_offload)
1449 			ecmp_size++;
1450 	}
1451 	if (!offload_change) {
1452 		/* Nothing was added or removed, so no need to reallocate. Just
1453 		 * update MAC on existing adjacency indexes.
1454 		 */
1455 		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1456 							false);
1457 		if (err) {
1458 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1459 			goto set_trap;
1460 		}
1461 		return;
1462 	}
1463 	if (!ecmp_size)
1464 		/* No neigh of this group is connected so we just set
1465 		 * the trap and let everthing flow through kernel.
1466 		 */
1467 		goto set_trap;
1468 
1469 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
1470 	if (err) {
1471 		/* We ran out of KVD linear space, just set the
1472 		 * trap and let everything flow through kernel.
1473 		 */
1474 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1475 		goto set_trap;
1476 	}
1477 	old_adj_index_valid = nh_grp->adj_index_valid;
1478 	old_adj_index = nh_grp->adj_index;
1479 	old_ecmp_size = nh_grp->ecmp_size;
1480 	nh_grp->adj_index_valid = 1;
1481 	nh_grp->adj_index = adj_index;
1482 	nh_grp->ecmp_size = ecmp_size;
1483 	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1484 	if (err) {
1485 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1486 		goto set_trap;
1487 	}
1488 
1489 	if (!old_adj_index_valid) {
1490 		/* The trap was set for fib entries, so we have to call
1491 		 * fib entry update to unset it and use adjacency index.
1492 		 */
1493 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1494 		if (err) {
1495 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1496 			goto set_trap;
1497 		}
1498 		return;
1499 	}
1500 
1501 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1502 					     old_adj_index, old_ecmp_size);
1503 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1504 	if (err) {
1505 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1506 		goto set_trap;
1507 	}
1508 	return;
1509 
1510 set_trap:
1511 	old_adj_index_valid = nh_grp->adj_index_valid;
1512 	nh_grp->adj_index_valid = 0;
1513 	for (i = 0; i < nh_grp->count; i++) {
1514 		nh = &nh_grp->nexthops[i];
1515 		nh->offloaded = 0;
1516 	}
1517 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1518 	if (err)
1519 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1520 	if (old_adj_index_valid)
1521 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1522 }
1523 
1524 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1525 					    bool removing)
1526 {
1527 	if (!removing && !nh->should_offload)
1528 		nh->should_offload = 1;
1529 	else if (removing && nh->offloaded)
1530 		nh->should_offload = 0;
1531 	nh->update = 1;
1532 }
1533 
1534 static void
1535 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1536 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1537 			      bool removing)
1538 {
1539 	struct mlxsw_sp_nexthop *nh;
1540 
1541 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
1542 			    neigh_list_node) {
1543 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
1544 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1545 	}
1546 }
1547 
1548 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1549 				      struct mlxsw_sp_rif *rif)
1550 {
1551 	if (nh->rif)
1552 		return;
1553 
1554 	nh->rif = rif;
1555 	list_add(&nh->rif_list_node, &rif->nexthop_list);
1556 }
1557 
1558 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1559 {
1560 	if (!nh->rif)
1561 		return;
1562 
1563 	list_del(&nh->rif_list_node);
1564 	nh->rif = NULL;
1565 }
1566 
1567 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1568 				       struct mlxsw_sp_nexthop *nh)
1569 {
1570 	struct mlxsw_sp_neigh_entry *neigh_entry;
1571 	struct fib_nh *fib_nh = nh->key.fib_nh;
1572 	struct neighbour *n;
1573 	u8 nud_state, dead;
1574 	int err;
1575 
1576 	if (!nh->nh_grp->gateway || nh->neigh_entry)
1577 		return 0;
1578 
1579 	/* Take a reference of neigh here ensuring that neigh would
1580 	 * not be detructed before the nexthop entry is finished.
1581 	 * The reference is taken either in neigh_lookup() or
1582 	 * in neigh_create() in case n is not found.
1583 	 */
1584 	n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1585 	if (!n) {
1586 		n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1587 		if (IS_ERR(n))
1588 			return PTR_ERR(n);
1589 		neigh_event_send(n, NULL);
1590 	}
1591 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1592 	if (!neigh_entry) {
1593 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1594 		if (IS_ERR(neigh_entry)) {
1595 			err = -EINVAL;
1596 			goto err_neigh_entry_create;
1597 		}
1598 	}
1599 
1600 	/* If that is the first nexthop connected to that neigh, add to
1601 	 * nexthop_neighs_list
1602 	 */
1603 	if (list_empty(&neigh_entry->nexthop_list))
1604 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1605 			      &mlxsw_sp->router.nexthop_neighs_list);
1606 
1607 	nh->neigh_entry = neigh_entry;
1608 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1609 	read_lock_bh(&n->lock);
1610 	nud_state = n->nud_state;
1611 	dead = n->dead;
1612 	read_unlock_bh(&n->lock);
1613 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1614 
1615 	return 0;
1616 
1617 err_neigh_entry_create:
1618 	neigh_release(n);
1619 	return err;
1620 }
1621 
1622 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1623 					struct mlxsw_sp_nexthop *nh)
1624 {
1625 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1626 	struct neighbour *n;
1627 
1628 	if (!neigh_entry)
1629 		return;
1630 	n = neigh_entry->key.n;
1631 
1632 	__mlxsw_sp_nexthop_neigh_update(nh, true);
1633 	list_del(&nh->neigh_list_node);
1634 	nh->neigh_entry = NULL;
1635 
1636 	/* If that is the last nexthop connected to that neigh, remove from
1637 	 * nexthop_neighs_list
1638 	 */
1639 	if (list_empty(&neigh_entry->nexthop_list))
1640 		list_del(&neigh_entry->nexthop_neighs_list_node);
1641 
1642 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1643 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1644 
1645 	neigh_release(n);
1646 }
1647 
1648 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1649 				 struct mlxsw_sp_nexthop_group *nh_grp,
1650 				 struct mlxsw_sp_nexthop *nh,
1651 				 struct fib_nh *fib_nh)
1652 {
1653 	struct net_device *dev = fib_nh->nh_dev;
1654 	struct in_device *in_dev;
1655 	struct mlxsw_sp_rif *rif;
1656 	int err;
1657 
1658 	nh->nh_grp = nh_grp;
1659 	nh->key.fib_nh = fib_nh;
1660 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1661 	if (err)
1662 		return err;
1663 
1664 	if (!dev)
1665 		return 0;
1666 
1667 	in_dev = __in_dev_get_rtnl(dev);
1668 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1669 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
1670 		return 0;
1671 
1672 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1673 	if (!rif)
1674 		return 0;
1675 	mlxsw_sp_nexthop_rif_init(nh, rif);
1676 
1677 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1678 	if (err)
1679 		goto err_nexthop_neigh_init;
1680 
1681 	return 0;
1682 
1683 err_nexthop_neigh_init:
1684 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1685 	return err;
1686 }
1687 
1688 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1689 				  struct mlxsw_sp_nexthop *nh)
1690 {
1691 	mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1692 	mlxsw_sp_nexthop_rif_fini(nh);
1693 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1694 }
1695 
1696 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1697 				   unsigned long event, struct fib_nh *fib_nh)
1698 {
1699 	struct mlxsw_sp_nexthop_key key;
1700 	struct mlxsw_sp_nexthop *nh;
1701 	struct mlxsw_sp_rif *rif;
1702 
1703 	if (mlxsw_sp->router.aborted)
1704 		return;
1705 
1706 	key.fib_nh = fib_nh;
1707 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1708 	if (WARN_ON_ONCE(!nh))
1709 		return;
1710 
1711 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1712 	if (!rif)
1713 		return;
1714 
1715 	switch (event) {
1716 	case FIB_EVENT_NH_ADD:
1717 		mlxsw_sp_nexthop_rif_init(nh, rif);
1718 		mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1719 		break;
1720 	case FIB_EVENT_NH_DEL:
1721 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1722 		mlxsw_sp_nexthop_rif_fini(nh);
1723 		break;
1724 	}
1725 
1726 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1727 }
1728 
1729 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1730 					   struct mlxsw_sp_rif *rif)
1731 {
1732 	struct mlxsw_sp_nexthop *nh, *tmp;
1733 
1734 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
1735 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1736 		mlxsw_sp_nexthop_rif_fini(nh);
1737 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1738 	}
1739 }
1740 
1741 static struct mlxsw_sp_nexthop_group *
1742 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1743 {
1744 	struct mlxsw_sp_nexthop_group *nh_grp;
1745 	struct mlxsw_sp_nexthop *nh;
1746 	struct fib_nh *fib_nh;
1747 	size_t alloc_size;
1748 	int i;
1749 	int err;
1750 
1751 	alloc_size = sizeof(*nh_grp) +
1752 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1753 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1754 	if (!nh_grp)
1755 		return ERR_PTR(-ENOMEM);
1756 	INIT_LIST_HEAD(&nh_grp->fib_list);
1757 	nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1758 	nh_grp->count = fi->fib_nhs;
1759 	nh_grp->key.fi = fi;
1760 	for (i = 0; i < nh_grp->count; i++) {
1761 		nh = &nh_grp->nexthops[i];
1762 		fib_nh = &fi->fib_nh[i];
1763 		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1764 		if (err)
1765 			goto err_nexthop_init;
1766 	}
1767 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1768 	if (err)
1769 		goto err_nexthop_group_insert;
1770 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1771 	return nh_grp;
1772 
1773 err_nexthop_group_insert:
1774 err_nexthop_init:
1775 	for (i--; i >= 0; i--) {
1776 		nh = &nh_grp->nexthops[i];
1777 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1778 	}
1779 	kfree(nh_grp);
1780 	return ERR_PTR(err);
1781 }
1782 
1783 static void
1784 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1785 			       struct mlxsw_sp_nexthop_group *nh_grp)
1786 {
1787 	struct mlxsw_sp_nexthop *nh;
1788 	int i;
1789 
1790 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1791 	for (i = 0; i < nh_grp->count; i++) {
1792 		nh = &nh_grp->nexthops[i];
1793 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1794 	}
1795 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1796 	WARN_ON_ONCE(nh_grp->adj_index_valid);
1797 	kfree(nh_grp);
1798 }
1799 
1800 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1801 				      struct mlxsw_sp_fib_entry *fib_entry,
1802 				      struct fib_info *fi)
1803 {
1804 	struct mlxsw_sp_nexthop_group_key key;
1805 	struct mlxsw_sp_nexthop_group *nh_grp;
1806 
1807 	key.fi = fi;
1808 	nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1809 	if (!nh_grp) {
1810 		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1811 		if (IS_ERR(nh_grp))
1812 			return PTR_ERR(nh_grp);
1813 	}
1814 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1815 	fib_entry->nh_group = nh_grp;
1816 	return 0;
1817 }
1818 
1819 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1820 				       struct mlxsw_sp_fib_entry *fib_entry)
1821 {
1822 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1823 
1824 	list_del(&fib_entry->nexthop_group_node);
1825 	if (!list_empty(&nh_grp->fib_list))
1826 		return;
1827 	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1828 }
1829 
1830 static bool
1831 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1832 {
1833 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1834 
1835 	if (fib_entry->params.tos)
1836 		return false;
1837 
1838 	switch (fib_entry->type) {
1839 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1840 		return !!nh_group->adj_index_valid;
1841 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1842 		return !!nh_group->nh_rif;
1843 	default:
1844 		return false;
1845 	}
1846 }
1847 
1848 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1849 {
1850 	fib_entry->offloaded = true;
1851 
1852 	switch (fib_entry->fib_node->fib->proto) {
1853 	case MLXSW_SP_L3_PROTO_IPV4:
1854 		fib_info_offload_inc(fib_entry->nh_group->key.fi);
1855 		break;
1856 	case MLXSW_SP_L3_PROTO_IPV6:
1857 		WARN_ON_ONCE(1);
1858 	}
1859 }
1860 
1861 static void
1862 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1863 {
1864 	switch (fib_entry->fib_node->fib->proto) {
1865 	case MLXSW_SP_L3_PROTO_IPV4:
1866 		fib_info_offload_dec(fib_entry->nh_group->key.fi);
1867 		break;
1868 	case MLXSW_SP_L3_PROTO_IPV6:
1869 		WARN_ON_ONCE(1);
1870 	}
1871 
1872 	fib_entry->offloaded = false;
1873 }
1874 
1875 static void
1876 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1877 				   enum mlxsw_reg_ralue_op op, int err)
1878 {
1879 	switch (op) {
1880 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1881 		if (!fib_entry->offloaded)
1882 			return;
1883 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1884 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1885 		if (err)
1886 			return;
1887 		if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1888 		    !fib_entry->offloaded)
1889 			mlxsw_sp_fib_entry_offload_set(fib_entry);
1890 		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1891 			 fib_entry->offloaded)
1892 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
1893 		return;
1894 	default:
1895 		return;
1896 	}
1897 }
1898 
1899 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1900 					 struct mlxsw_sp_fib_entry *fib_entry,
1901 					 enum mlxsw_reg_ralue_op op)
1902 {
1903 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1904 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1905 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1906 	enum mlxsw_reg_ralue_trap_action trap_action;
1907 	u16 trap_id = 0;
1908 	u32 adjacency_index = 0;
1909 	u16 ecmp_size = 0;
1910 
1911 	/* In case the nexthop group adjacency index is valid, use it
1912 	 * with provided ECMP size. Otherwise, setup trap and pass
1913 	 * traffic to kernel.
1914 	 */
1915 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1916 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1917 		adjacency_index = fib_entry->nh_group->adj_index;
1918 		ecmp_size = fib_entry->nh_group->ecmp_size;
1919 	} else {
1920 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1921 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1922 	}
1923 
1924 	mlxsw_reg_ralue_pack4(ralue_pl,
1925 			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1926 			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
1927 			      *p_dip);
1928 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1929 					adjacency_index, ecmp_size);
1930 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1931 }
1932 
1933 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1934 					struct mlxsw_sp_fib_entry *fib_entry,
1935 					enum mlxsw_reg_ralue_op op)
1936 {
1937 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
1938 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1939 	enum mlxsw_reg_ralue_trap_action trap_action;
1940 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1941 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1942 	u16 trap_id = 0;
1943 	u16 rif_index = 0;
1944 
1945 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1946 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1947 		rif_index = rif->rif_index;
1948 	} else {
1949 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1950 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1951 	}
1952 
1953 	mlxsw_reg_ralue_pack4(ralue_pl,
1954 			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1955 			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
1956 			      *p_dip);
1957 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
1958 				       rif_index);
1959 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1960 }
1961 
1962 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1963 				       struct mlxsw_sp_fib_entry *fib_entry,
1964 				       enum mlxsw_reg_ralue_op op)
1965 {
1966 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
1967 	char ralue_pl[MLXSW_REG_RALUE_LEN];
1968 	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1969 
1970 	mlxsw_reg_ralue_pack4(ralue_pl,
1971 			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
1972 			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
1973 			      *p_dip);
1974 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1975 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1976 }
1977 
1978 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1979 				  struct mlxsw_sp_fib_entry *fib_entry,
1980 				  enum mlxsw_reg_ralue_op op)
1981 {
1982 	switch (fib_entry->type) {
1983 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1984 		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1985 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1986 		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1987 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1988 		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1989 	}
1990 	return -EINVAL;
1991 }
1992 
1993 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1994 				 struct mlxsw_sp_fib_entry *fib_entry,
1995 				 enum mlxsw_reg_ralue_op op)
1996 {
1997 	int err = -EINVAL;
1998 
1999 	switch (fib_entry->fib_node->fib->proto) {
2000 	case MLXSW_SP_L3_PROTO_IPV4:
2001 		err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
2002 		break;
2003 	case MLXSW_SP_L3_PROTO_IPV6:
2004 		return err;
2005 	}
2006 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
2007 	return err;
2008 }
2009 
2010 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
2011 				     struct mlxsw_sp_fib_entry *fib_entry)
2012 {
2013 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2014 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
2015 }
2016 
2017 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
2018 				  struct mlxsw_sp_fib_entry *fib_entry)
2019 {
2020 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
2021 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
2022 }
2023 
2024 static int
2025 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
2026 			     const struct fib_entry_notifier_info *fen_info,
2027 			     struct mlxsw_sp_fib_entry *fib_entry)
2028 {
2029 	struct fib_info *fi = fen_info->fi;
2030 
2031 	switch (fen_info->type) {
2032 	case RTN_BROADCAST: /* fall through */
2033 	case RTN_LOCAL:
2034 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2035 		return 0;
2036 	case RTN_UNREACHABLE: /* fall through */
2037 	case RTN_BLACKHOLE: /* fall through */
2038 	case RTN_PROHIBIT:
2039 		/* Packets hitting these routes need to be trapped, but
2040 		 * can do so with a lower priority than packets directed
2041 		 * at the host, so use action type local instead of trap.
2042 		 */
2043 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2044 		return 0;
2045 	case RTN_UNICAST:
2046 		if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
2047 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
2048 		else
2049 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
2050 		return 0;
2051 	default:
2052 		return -EINVAL;
2053 	}
2054 }
2055 
2056 static struct mlxsw_sp_fib_entry *
2057 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
2058 			   struct mlxsw_sp_fib_node *fib_node,
2059 			   const struct fib_entry_notifier_info *fen_info)
2060 {
2061 	struct mlxsw_sp_fib_entry *fib_entry;
2062 	int err;
2063 
2064 	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
2065 	if (!fib_entry) {
2066 		err = -ENOMEM;
2067 		goto err_fib_entry_alloc;
2068 	}
2069 
2070 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
2071 	if (err)
2072 		goto err_fib4_entry_type_set;
2073 
2074 	err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
2075 	if (err)
2076 		goto err_nexthop_group_get;
2077 
2078 	fib_entry->params.prio = fen_info->fi->fib_priority;
2079 	fib_entry->params.tb_id = fen_info->tb_id;
2080 	fib_entry->params.type = fen_info->type;
2081 	fib_entry->params.tos = fen_info->tos;
2082 
2083 	fib_entry->fib_node = fib_node;
2084 
2085 	return fib_entry;
2086 
2087 err_nexthop_group_get:
2088 err_fib4_entry_type_set:
2089 	kfree(fib_entry);
2090 err_fib_entry_alloc:
2091 	return ERR_PTR(err);
2092 }
2093 
2094 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2095 					struct mlxsw_sp_fib_entry *fib_entry)
2096 {
2097 	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
2098 	kfree(fib_entry);
2099 }
2100 
2101 static struct mlxsw_sp_fib_node *
2102 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2103 		       const struct fib_entry_notifier_info *fen_info);
2104 
2105 static struct mlxsw_sp_fib_entry *
2106 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
2107 			   const struct fib_entry_notifier_info *fen_info)
2108 {
2109 	struct mlxsw_sp_fib_entry *fib_entry;
2110 	struct mlxsw_sp_fib_node *fib_node;
2111 
2112 	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2113 	if (IS_ERR(fib_node))
2114 		return NULL;
2115 
2116 	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2117 		if (fib_entry->params.tb_id == fen_info->tb_id &&
2118 		    fib_entry->params.tos == fen_info->tos &&
2119 		    fib_entry->params.type == fen_info->type &&
2120 		    fib_entry->nh_group->key.fi == fen_info->fi) {
2121 			return fib_entry;
2122 		}
2123 	}
2124 
2125 	return NULL;
2126 }
2127 
2128 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
2129 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
2130 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
2131 	.key_len = sizeof(struct mlxsw_sp_fib_key),
2132 	.automatic_shrinking = true,
2133 };
2134 
2135 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
2136 				    struct mlxsw_sp_fib_node *fib_node)
2137 {
2138 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
2139 				      mlxsw_sp_fib_ht_params);
2140 }
2141 
2142 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
2143 				     struct mlxsw_sp_fib_node *fib_node)
2144 {
2145 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
2146 			       mlxsw_sp_fib_ht_params);
2147 }
2148 
2149 static struct mlxsw_sp_fib_node *
2150 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2151 			 size_t addr_len, unsigned char prefix_len)
2152 {
2153 	struct mlxsw_sp_fib_key key;
2154 
2155 	memset(&key, 0, sizeof(key));
2156 	memcpy(key.addr, addr, addr_len);
2157 	key.prefix_len = prefix_len;
2158 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
2159 }
2160 
2161 static struct mlxsw_sp_fib_node *
2162 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
2163 			 size_t addr_len, unsigned char prefix_len)
2164 {
2165 	struct mlxsw_sp_fib_node *fib_node;
2166 
2167 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2168 	if (!fib_node)
2169 		return NULL;
2170 
2171 	INIT_LIST_HEAD(&fib_node->entry_list);
2172 	list_add(&fib_node->list, &fib->node_list);
2173 	memcpy(fib_node->key.addr, addr, addr_len);
2174 	fib_node->key.prefix_len = prefix_len;
2175 
2176 	return fib_node;
2177 }
2178 
2179 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2180 {
2181 	list_del(&fib_node->list);
2182 	WARN_ON(!list_empty(&fib_node->entry_list));
2183 	kfree(fib_node);
2184 }
2185 
2186 static bool
2187 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2188 				 const struct mlxsw_sp_fib_entry *fib_entry)
2189 {
2190 	return list_first_entry(&fib_node->entry_list,
2191 				struct mlxsw_sp_fib_entry, list) == fib_entry;
2192 }
2193 
2194 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2195 {
2196 	unsigned char prefix_len = fib_node->key.prefix_len;
2197 	struct mlxsw_sp_fib *fib = fib_node->fib;
2198 
2199 	if (fib->prefix_ref_count[prefix_len]++ == 0)
2200 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2201 }
2202 
2203 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2204 {
2205 	unsigned char prefix_len = fib_node->key.prefix_len;
2206 	struct mlxsw_sp_fib *fib = fib_node->fib;
2207 
2208 	if (--fib->prefix_ref_count[prefix_len] == 0)
2209 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2210 }
2211 
2212 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
2213 				  struct mlxsw_sp_fib_node *fib_node,
2214 				  struct mlxsw_sp_fib *fib)
2215 {
2216 	struct mlxsw_sp_prefix_usage req_prefix_usage;
2217 	struct mlxsw_sp_lpm_tree *lpm_tree;
2218 	int err;
2219 
2220 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
2221 	if (err)
2222 		return err;
2223 	fib_node->fib = fib;
2224 
2225 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage);
2226 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
2227 
2228 	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
2229 		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib,
2230 						 &req_prefix_usage);
2231 		if (err)
2232 			goto err_tree_check;
2233 	} else {
2234 		lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
2235 						 fib->proto);
2236 		if (IS_ERR(lpm_tree))
2237 			return PTR_ERR(lpm_tree);
2238 		fib->lpm_tree = lpm_tree;
2239 		err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
2240 		if (err)
2241 			goto err_tree_bind;
2242 	}
2243 
2244 	mlxsw_sp_fib_node_prefix_inc(fib_node);
2245 
2246 	return 0;
2247 
2248 err_tree_bind:
2249 	fib->lpm_tree = NULL;
2250 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
2251 err_tree_check:
2252 	fib_node->fib = NULL;
2253 	mlxsw_sp_fib_node_remove(fib, fib_node);
2254 	return err;
2255 }
2256 
2257 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
2258 				   struct mlxsw_sp_fib_node *fib_node)
2259 {
2260 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
2261 	struct mlxsw_sp_fib *fib = fib_node->fib;
2262 
2263 	mlxsw_sp_fib_node_prefix_dec(fib_node);
2264 
2265 	if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
2266 		mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
2267 		fib->lpm_tree = NULL;
2268 		mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
2269 	} else {
2270 		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage);
2271 	}
2272 
2273 	fib_node->fib = NULL;
2274 	mlxsw_sp_fib_node_remove(fib, fib_node);
2275 }
2276 
2277 static struct mlxsw_sp_fib_node *
2278 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2279 		       const struct fib_entry_notifier_info *fen_info)
2280 {
2281 	struct mlxsw_sp_fib_node *fib_node;
2282 	struct mlxsw_sp_fib *fib;
2283 	struct mlxsw_sp_vr *vr;
2284 	int err;
2285 
2286 	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id);
2287 	if (IS_ERR(vr))
2288 		return ERR_CAST(vr);
2289 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
2290 
2291 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
2292 					    sizeof(fen_info->dst),
2293 					    fen_info->dst_len);
2294 	if (fib_node)
2295 		return fib_node;
2296 
2297 	fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst,
2298 					    sizeof(fen_info->dst),
2299 					    fen_info->dst_len);
2300 	if (!fib_node) {
2301 		err = -ENOMEM;
2302 		goto err_fib_node_create;
2303 	}
2304 
2305 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
2306 	if (err)
2307 		goto err_fib_node_init;
2308 
2309 	return fib_node;
2310 
2311 err_fib_node_init:
2312 	mlxsw_sp_fib_node_destroy(fib_node);
2313 err_fib_node_create:
2314 	mlxsw_sp_vr_put(vr);
2315 	return ERR_PTR(err);
2316 }
2317 
2318 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2319 				   struct mlxsw_sp_fib_node *fib_node)
2320 {
2321 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
2322 
2323 	if (!list_empty(&fib_node->entry_list))
2324 		return;
2325 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
2326 	mlxsw_sp_fib_node_destroy(fib_node);
2327 	mlxsw_sp_vr_put(vr);
2328 }
2329 
2330 static struct mlxsw_sp_fib_entry *
2331 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2332 			      const struct mlxsw_sp_fib_entry_params *params)
2333 {
2334 	struct mlxsw_sp_fib_entry *fib_entry;
2335 
2336 	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2337 		if (fib_entry->params.tb_id > params->tb_id)
2338 			continue;
2339 		if (fib_entry->params.tb_id != params->tb_id)
2340 			break;
2341 		if (fib_entry->params.tos > params->tos)
2342 			continue;
2343 		if (fib_entry->params.prio >= params->prio ||
2344 		    fib_entry->params.tos < params->tos)
2345 			return fib_entry;
2346 	}
2347 
2348 	return NULL;
2349 }
2350 
2351 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2352 					  struct mlxsw_sp_fib_entry *new_entry)
2353 {
2354 	struct mlxsw_sp_fib_node *fib_node;
2355 
2356 	if (WARN_ON(!fib_entry))
2357 		return -EINVAL;
2358 
2359 	fib_node = fib_entry->fib_node;
2360 	list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2361 		if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2362 		    fib_entry->params.tos != new_entry->params.tos ||
2363 		    fib_entry->params.prio != new_entry->params.prio)
2364 			break;
2365 	}
2366 
2367 	list_add_tail(&new_entry->list, &fib_entry->list);
2368 	return 0;
2369 }
2370 
2371 static int
2372 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2373 			       struct mlxsw_sp_fib_entry *new_entry,
2374 			       bool replace, bool append)
2375 {
2376 	struct mlxsw_sp_fib_entry *fib_entry;
2377 
2378 	fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2379 
2380 	if (append)
2381 		return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2382 	if (replace && WARN_ON(!fib_entry))
2383 		return -EINVAL;
2384 
2385 	/* Insert new entry before replaced one, so that we can later
2386 	 * remove the second.
2387 	 */
2388 	if (fib_entry) {
2389 		list_add_tail(&new_entry->list, &fib_entry->list);
2390 	} else {
2391 		struct mlxsw_sp_fib_entry *last;
2392 
2393 		list_for_each_entry(last, &fib_node->entry_list, list) {
2394 			if (new_entry->params.tb_id > last->params.tb_id)
2395 				break;
2396 			fib_entry = last;
2397 		}
2398 
2399 		if (fib_entry)
2400 			list_add(&new_entry->list, &fib_entry->list);
2401 		else
2402 			list_add(&new_entry->list, &fib_node->entry_list);
2403 	}
2404 
2405 	return 0;
2406 }
2407 
2408 static void
2409 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2410 {
2411 	list_del(&fib_entry->list);
2412 }
2413 
2414 static int
2415 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2416 			     const struct mlxsw_sp_fib_node *fib_node,
2417 			     struct mlxsw_sp_fib_entry *fib_entry)
2418 {
2419 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2420 		return 0;
2421 
2422 	/* To prevent packet loss, overwrite the previously offloaded
2423 	 * entry.
2424 	 */
2425 	if (!list_is_singular(&fib_node->entry_list)) {
2426 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2427 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2428 
2429 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2430 	}
2431 
2432 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2433 }
2434 
2435 static void
2436 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2437 			     const struct mlxsw_sp_fib_node *fib_node,
2438 			     struct mlxsw_sp_fib_entry *fib_entry)
2439 {
2440 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2441 		return;
2442 
2443 	/* Promote the next entry by overwriting the deleted entry */
2444 	if (!list_is_singular(&fib_node->entry_list)) {
2445 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2446 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2447 
2448 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2449 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2450 		return;
2451 	}
2452 
2453 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2454 }
2455 
2456 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2457 					 struct mlxsw_sp_fib_entry *fib_entry,
2458 					 bool replace, bool append)
2459 {
2460 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2461 	int err;
2462 
2463 	err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2464 					     append);
2465 	if (err)
2466 		return err;
2467 
2468 	err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2469 	if (err)
2470 		goto err_fib4_node_entry_add;
2471 
2472 	return 0;
2473 
2474 err_fib4_node_entry_add:
2475 	mlxsw_sp_fib4_node_list_remove(fib_entry);
2476 	return err;
2477 }
2478 
2479 static void
2480 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2481 				struct mlxsw_sp_fib_entry *fib_entry)
2482 {
2483 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2484 
2485 	mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2486 	mlxsw_sp_fib4_node_list_remove(fib_entry);
2487 }
2488 
2489 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2490 					struct mlxsw_sp_fib_entry *fib_entry,
2491 					bool replace)
2492 {
2493 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2494 	struct mlxsw_sp_fib_entry *replaced;
2495 
2496 	if (!replace)
2497 		return;
2498 
2499 	/* We inserted the new entry before replaced one */
2500 	replaced = list_next_entry(fib_entry, list);
2501 
2502 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2503 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2504 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2505 }
2506 
2507 static int
2508 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2509 			 const struct fib_entry_notifier_info *fen_info,
2510 			 bool replace, bool append)
2511 {
2512 	struct mlxsw_sp_fib_entry *fib_entry;
2513 	struct mlxsw_sp_fib_node *fib_node;
2514 	int err;
2515 
2516 	if (mlxsw_sp->router.aborted)
2517 		return 0;
2518 
2519 	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2520 	if (IS_ERR(fib_node)) {
2521 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2522 		return PTR_ERR(fib_node);
2523 	}
2524 
2525 	fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2526 	if (IS_ERR(fib_entry)) {
2527 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2528 		err = PTR_ERR(fib_entry);
2529 		goto err_fib4_entry_create;
2530 	}
2531 
2532 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2533 					    append);
2534 	if (err) {
2535 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2536 		goto err_fib4_node_entry_link;
2537 	}
2538 
2539 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2540 
2541 	return 0;
2542 
2543 err_fib4_node_entry_link:
2544 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2545 err_fib4_entry_create:
2546 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2547 	return err;
2548 }
2549 
2550 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2551 				     struct fib_entry_notifier_info *fen_info)
2552 {
2553 	struct mlxsw_sp_fib_entry *fib_entry;
2554 	struct mlxsw_sp_fib_node *fib_node;
2555 
2556 	if (mlxsw_sp->router.aborted)
2557 		return;
2558 
2559 	fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2560 	if (WARN_ON(!fib_entry))
2561 		return;
2562 	fib_node = fib_entry->fib_node;
2563 
2564 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2565 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2566 	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2567 }
2568 
2569 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2570 {
2571 	char ralta_pl[MLXSW_REG_RALTA_LEN];
2572 	char ralst_pl[MLXSW_REG_RALST_LEN];
2573 	int i, err;
2574 
2575 	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2576 			     MLXSW_SP_LPM_TREE_MIN);
2577 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2578 	if (err)
2579 		return err;
2580 
2581 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2582 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2583 	if (err)
2584 		return err;
2585 
2586 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2587 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
2588 		char raltb_pl[MLXSW_REG_RALTB_LEN];
2589 		char ralue_pl[MLXSW_REG_RALUE_LEN];
2590 
2591 		if (!mlxsw_sp_vr_is_used(vr))
2592 			continue;
2593 
2594 		mlxsw_reg_raltb_pack(raltb_pl, vr->id,
2595 				     MLXSW_REG_RALXX_PROTOCOL_IPV4,
2596 				     MLXSW_SP_LPM_TREE_MIN);
2597 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
2598 				      raltb_pl);
2599 		if (err)
2600 			return err;
2601 
2602 		mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2603 				      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0,
2604 				      0);
2605 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2606 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
2607 				      ralue_pl);
2608 		if (err)
2609 			return err;
2610 	}
2611 
2612 	return 0;
2613 }
2614 
2615 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2616 				     struct mlxsw_sp_fib_node *fib_node)
2617 {
2618 	struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2619 
2620 	list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2621 		bool do_break = &tmp->list == &fib_node->entry_list;
2622 
2623 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2624 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2625 		mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2626 		/* Break when entry list is empty and node was freed.
2627 		 * Otherwise, we'll access freed memory in the next
2628 		 * iteration.
2629 		 */
2630 		if (do_break)
2631 			break;
2632 	}
2633 }
2634 
2635 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2636 				    struct mlxsw_sp_fib_node *fib_node)
2637 {
2638 	switch (fib_node->fib->proto) {
2639 	case MLXSW_SP_L3_PROTO_IPV4:
2640 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2641 		break;
2642 	case MLXSW_SP_L3_PROTO_IPV6:
2643 		WARN_ON_ONCE(1);
2644 		break;
2645 	}
2646 }
2647 
2648 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
2649 				  struct mlxsw_sp_vr *vr,
2650 				  enum mlxsw_sp_l3proto proto)
2651 {
2652 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
2653 	struct mlxsw_sp_fib_node *fib_node, *tmp;
2654 
2655 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
2656 		bool do_break = &tmp->list == &fib->node_list;
2657 
2658 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2659 		if (do_break)
2660 			break;
2661 	}
2662 }
2663 
2664 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2665 {
2666 	int i;
2667 
2668 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2669 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i];
2670 
2671 		if (!mlxsw_sp_vr_is_used(vr))
2672 			continue;
2673 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
2674 	}
2675 }
2676 
2677 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2678 {
2679 	int err;
2680 
2681 	if (mlxsw_sp->router.aborted)
2682 		return;
2683 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2684 	mlxsw_sp_router_fib_flush(mlxsw_sp);
2685 	mlxsw_sp->router.aborted = true;
2686 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2687 	if (err)
2688 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2689 }
2690 
2691 struct mlxsw_sp_fib_event_work {
2692 	struct work_struct work;
2693 	union {
2694 		struct fib_entry_notifier_info fen_info;
2695 		struct fib_rule_notifier_info fr_info;
2696 		struct fib_nh_notifier_info fnh_info;
2697 	};
2698 	struct mlxsw_sp *mlxsw_sp;
2699 	unsigned long event;
2700 };
2701 
2702 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2703 {
2704 	struct mlxsw_sp_fib_event_work *fib_work =
2705 		container_of(work, struct mlxsw_sp_fib_event_work, work);
2706 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2707 	struct fib_rule *rule;
2708 	bool replace, append;
2709 	int err;
2710 
2711 	/* Protect internal structures from changes */
2712 	rtnl_lock();
2713 	switch (fib_work->event) {
2714 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2715 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
2716 	case FIB_EVENT_ENTRY_ADD:
2717 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2718 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2719 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2720 					       replace, append);
2721 		if (err)
2722 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
2723 		fib_info_put(fib_work->fen_info.fi);
2724 		break;
2725 	case FIB_EVENT_ENTRY_DEL:
2726 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2727 		fib_info_put(fib_work->fen_info.fi);
2728 		break;
2729 	case FIB_EVENT_RULE_ADD: /* fall through */
2730 	case FIB_EVENT_RULE_DEL:
2731 		rule = fib_work->fr_info.rule;
2732 		if (!fib4_rule_default(rule) && !rule->l3mdev)
2733 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
2734 		fib_rule_put(rule);
2735 		break;
2736 	case FIB_EVENT_NH_ADD: /* fall through */
2737 	case FIB_EVENT_NH_DEL:
2738 		mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2739 				       fib_work->fnh_info.fib_nh);
2740 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2741 		break;
2742 	}
2743 	rtnl_unlock();
2744 	kfree(fib_work);
2745 }
2746 
2747 /* Called with rcu_read_lock() */
2748 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2749 				     unsigned long event, void *ptr)
2750 {
2751 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2752 	struct mlxsw_sp_fib_event_work *fib_work;
2753 	struct fib_notifier_info *info = ptr;
2754 
2755 	if (!net_eq(info->net, &init_net))
2756 		return NOTIFY_DONE;
2757 
2758 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2759 	if (WARN_ON(!fib_work))
2760 		return NOTIFY_BAD;
2761 
2762 	INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2763 	fib_work->mlxsw_sp = mlxsw_sp;
2764 	fib_work->event = event;
2765 
2766 	switch (event) {
2767 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2768 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
2769 	case FIB_EVENT_ENTRY_ADD: /* fall through */
2770 	case FIB_EVENT_ENTRY_DEL:
2771 		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2772 		/* Take referece on fib_info to prevent it from being
2773 		 * freed while work is queued. Release it afterwards.
2774 		 */
2775 		fib_info_hold(fib_work->fen_info.fi);
2776 		break;
2777 	case FIB_EVENT_RULE_ADD: /* fall through */
2778 	case FIB_EVENT_RULE_DEL:
2779 		memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
2780 		fib_rule_get(fib_work->fr_info.rule);
2781 		break;
2782 	case FIB_EVENT_NH_ADD: /* fall through */
2783 	case FIB_EVENT_NH_DEL:
2784 		memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2785 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2786 		break;
2787 	}
2788 
2789 	mlxsw_core_schedule_work(&fib_work->work);
2790 
2791 	return NOTIFY_DONE;
2792 }
2793 
2794 static struct mlxsw_sp_rif *
2795 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
2796 			 const struct net_device *dev)
2797 {
2798 	int i;
2799 
2800 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2801 		if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
2802 			return mlxsw_sp->rifs[i];
2803 
2804 	return NULL;
2805 }
2806 
2807 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2808 {
2809 	char ritr_pl[MLXSW_REG_RITR_LEN];
2810 	int err;
2811 
2812 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2813 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2814 	if (WARN_ON_ONCE(err))
2815 		return err;
2816 
2817 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
2818 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2819 }
2820 
2821 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2822 					  struct mlxsw_sp_rif *rif)
2823 {
2824 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
2825 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
2826 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
2827 }
2828 
2829 static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif,
2830 				       const struct in_device *in_dev,
2831 				       unsigned long event)
2832 {
2833 	switch (event) {
2834 	case NETDEV_UP:
2835 		if (!rif)
2836 			return true;
2837 		return false;
2838 	case NETDEV_DOWN:
2839 		if (rif && !in_dev->ifa_list &&
2840 		    !netif_is_l3_slave(rif->dev))
2841 			return true;
2842 		/* It is possible we already removed the RIF ourselves
2843 		 * if it was assigned to a netdev that is now a bridge
2844 		 * or LAG slave.
2845 		 */
2846 		return false;
2847 	}
2848 
2849 	return false;
2850 }
2851 
2852 #define MLXSW_SP_INVALID_INDEX_RIF 0xffff
2853 static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
2854 {
2855 	int i;
2856 
2857 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2858 		if (!mlxsw_sp->rifs[i])
2859 			return i;
2860 
2861 	return MLXSW_SP_INVALID_INDEX_RIF;
2862 }
2863 
2864 static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
2865 					   bool *p_lagged, u16 *p_system_port)
2866 {
2867 	u8 local_port = mlxsw_sp_vport->local_port;
2868 
2869 	*p_lagged = mlxsw_sp_vport->lagged;
2870 	*p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
2871 }
2872 
2873 static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
2874 				    u16 vr_id, struct net_device *l3_dev,
2875 				    u16 rif_index, bool create)
2876 {
2877 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2878 	bool lagged = mlxsw_sp_vport->lagged;
2879 	char ritr_pl[MLXSW_REG_RITR_LEN];
2880 	u16 system_port;
2881 
2882 	mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif_index,
2883 			    vr_id, l3_dev->mtu, l3_dev->dev_addr);
2884 
2885 	mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
2886 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
2887 				  mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
2888 
2889 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2890 }
2891 
2892 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
2893 
2894 static u16 mlxsw_sp_rif_sp_to_fid(u16 rif_index)
2895 {
2896 	return MLXSW_SP_RFID_BASE + rif_index;
2897 }
2898 
2899 static struct mlxsw_sp_fid *
2900 mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
2901 {
2902 	struct mlxsw_sp_fid *f;
2903 
2904 	f = kzalloc(sizeof(*f), GFP_KERNEL);
2905 	if (!f)
2906 		return NULL;
2907 
2908 	f->leave = mlxsw_sp_vport_rif_sp_leave;
2909 	f->ref_count = 0;
2910 	f->dev = l3_dev;
2911 	f->fid = fid;
2912 
2913 	return f;
2914 }
2915 
2916 static struct mlxsw_sp_rif *
2917 mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev,
2918 		   struct mlxsw_sp_fid *f)
2919 {
2920 	struct mlxsw_sp_rif *rif;
2921 
2922 	rif = kzalloc(sizeof(*rif), GFP_KERNEL);
2923 	if (!rif)
2924 		return NULL;
2925 
2926 	INIT_LIST_HEAD(&rif->nexthop_list);
2927 	INIT_LIST_HEAD(&rif->neigh_list);
2928 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
2929 	rif->mtu = l3_dev->mtu;
2930 	rif->vr_id = vr_id;
2931 	rif->dev = l3_dev;
2932 	rif->rif_index = rif_index;
2933 	rif->f = f;
2934 
2935 	return rif;
2936 }
2937 
2938 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
2939 {
2940 	return rif->rif_index;
2941 }
2942 
2943 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
2944 {
2945 	return rif->dev->ifindex;
2946 }
2947 
2948 static struct mlxsw_sp_rif *
2949 mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
2950 			     struct net_device *l3_dev)
2951 {
2952 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2953 	u32 tb_id = l3mdev_fib_table(l3_dev);
2954 	struct mlxsw_sp_vr *vr;
2955 	struct mlxsw_sp_fid *f;
2956 	struct mlxsw_sp_rif *rif;
2957 	u16 fid, rif_index;
2958 	int err;
2959 
2960 	rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
2961 	if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
2962 		return ERR_PTR(-ERANGE);
2963 
2964 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
2965 	if (IS_ERR(vr))
2966 		return ERR_CAST(vr);
2967 
2968 	err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev,
2969 				       rif_index, true);
2970 	if (err)
2971 		goto err_vport_rif_sp_op;
2972 
2973 	fid = mlxsw_sp_rif_sp_to_fid(rif_index);
2974 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
2975 	if (err)
2976 		goto err_rif_fdb_op;
2977 
2978 	f = mlxsw_sp_rfid_alloc(fid, l3_dev);
2979 	if (!f) {
2980 		err = -ENOMEM;
2981 		goto err_rfid_alloc;
2982 	}
2983 
2984 	rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
2985 	if (!rif) {
2986 		err = -ENOMEM;
2987 		goto err_rif_alloc;
2988 	}
2989 
2990 	if (devlink_dpipe_table_counter_enabled(priv_to_devlink(mlxsw_sp->core),
2991 						MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) {
2992 		err = mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif,
2993 						 MLXSW_SP_RIF_COUNTER_EGRESS);
2994 		if (err)
2995 			netdev_dbg(mlxsw_sp_vport->dev,
2996 				   "Counter alloc Failed err=%d\n", err);
2997 	}
2998 
2999 	f->rif = rif;
3000 	mlxsw_sp->rifs[rif_index] = rif;
3001 	vr->rif_count++;
3002 
3003 	return rif;
3004 
3005 err_rif_alloc:
3006 	kfree(f);
3007 err_rfid_alloc:
3008 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
3009 err_rif_fdb_op:
3010 	mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
3011 				 false);
3012 err_vport_rif_sp_op:
3013 	mlxsw_sp_vr_put(vr);
3014 	return ERR_PTR(err);
3015 }
3016 
3017 static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
3018 					  struct mlxsw_sp_rif *rif)
3019 {
3020 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
3021 	struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
3022 	struct net_device *l3_dev = rif->dev;
3023 	struct mlxsw_sp_fid *f = rif->f;
3024 	u16 rif_index = rif->rif_index;
3025 	u16 fid = f->fid;
3026 
3027 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
3028 
3029 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
3030 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_INGRESS);
3031 
3032 	vr->rif_count--;
3033 	mlxsw_sp->rifs[rif_index] = NULL;
3034 	f->rif = NULL;
3035 
3036 	kfree(rif);
3037 
3038 	kfree(f);
3039 
3040 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
3041 
3042 	mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index,
3043 				 false);
3044 	mlxsw_sp_vr_put(vr);
3045 }
3046 
3047 static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
3048 				      struct net_device *l3_dev)
3049 {
3050 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
3051 	struct mlxsw_sp_rif *rif;
3052 
3053 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3054 	if (!rif) {
3055 		rif = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
3056 		if (IS_ERR(rif))
3057 			return PTR_ERR(rif);
3058 	}
3059 
3060 	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, rif->f);
3061 	rif->f->ref_count++;
3062 
3063 	netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", rif->f->fid);
3064 
3065 	return 0;
3066 }
3067 
3068 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
3069 {
3070 	struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
3071 
3072 	netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
3073 
3074 	mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
3075 	if (--f->ref_count == 0)
3076 		mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->rif);
3077 }
3078 
3079 static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
3080 					 struct net_device *port_dev,
3081 					 unsigned long event, u16 vid)
3082 {
3083 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
3084 	struct mlxsw_sp_port *mlxsw_sp_vport;
3085 
3086 	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
3087 	if (WARN_ON(!mlxsw_sp_vport))
3088 		return -EINVAL;
3089 
3090 	switch (event) {
3091 	case NETDEV_UP:
3092 		return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
3093 	case NETDEV_DOWN:
3094 		mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
3095 		break;
3096 	}
3097 
3098 	return 0;
3099 }
3100 
3101 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
3102 					unsigned long event)
3103 {
3104 	if (netif_is_bridge_port(port_dev) ||
3105 	    netif_is_lag_port(port_dev) ||
3106 	    netif_is_ovs_port(port_dev))
3107 		return 0;
3108 
3109 	return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
3110 }
3111 
3112 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
3113 					 struct net_device *lag_dev,
3114 					 unsigned long event, u16 vid)
3115 {
3116 	struct net_device *port_dev;
3117 	struct list_head *iter;
3118 	int err;
3119 
3120 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
3121 		if (mlxsw_sp_port_dev_check(port_dev)) {
3122 			err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
3123 							    event, vid);
3124 			if (err)
3125 				return err;
3126 		}
3127 	}
3128 
3129 	return 0;
3130 }
3131 
3132 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
3133 				       unsigned long event)
3134 {
3135 	if (netif_is_bridge_port(lag_dev))
3136 		return 0;
3137 
3138 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
3139 }
3140 
3141 static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
3142 						    struct net_device *l3_dev)
3143 {
3144 	u16 fid;
3145 
3146 	if (is_vlan_dev(l3_dev))
3147 		fid = vlan_dev_vlan_id(l3_dev);
3148 	else if (mlxsw_sp->master_bridge.dev == l3_dev)
3149 		fid = 1;
3150 	else
3151 		return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
3152 
3153 	return mlxsw_sp_fid_find(mlxsw_sp, fid);
3154 }
3155 
3156 static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
3157 {
3158 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
3159 }
3160 
3161 static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
3162 {
3163 	return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
3164 	       MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
3165 }
3166 
3167 static u16 mlxsw_sp_flood_table_index_get(u16 fid)
3168 {
3169 	return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
3170 }
3171 
3172 static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
3173 					  bool set)
3174 {
3175 	u8 router_port = mlxsw_sp_router_port(mlxsw_sp);
3176 	enum mlxsw_flood_table_type table_type;
3177 	char *sftr_pl;
3178 	u16 index;
3179 	int err;
3180 
3181 	sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
3182 	if (!sftr_pl)
3183 		return -ENOMEM;
3184 
3185 	table_type = mlxsw_sp_flood_table_type_get(fid);
3186 	index = mlxsw_sp_flood_table_index_get(fid);
3187 	mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type,
3188 			    1, router_port, set);
3189 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
3190 
3191 	kfree(sftr_pl);
3192 	return err;
3193 }
3194 
3195 static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
3196 {
3197 	if (mlxsw_sp_fid_is_vfid(fid))
3198 		return MLXSW_REG_RITR_FID_IF;
3199 	else
3200 		return MLXSW_REG_RITR_VLAN_IF;
3201 }
3202 
3203 static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, u16 vr_id,
3204 				  struct net_device *l3_dev,
3205 				  u16 fid, u16 rif,
3206 				  bool create)
3207 {
3208 	enum mlxsw_reg_ritr_if_type rif_type;
3209 	char ritr_pl[MLXSW_REG_RITR_LEN];
3210 
3211 	rif_type = mlxsw_sp_rif_type_get(fid);
3212 	mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, vr_id, l3_dev->mtu,
3213 			    l3_dev->dev_addr);
3214 	mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
3215 
3216 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3217 }
3218 
3219 static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
3220 				      struct net_device *l3_dev,
3221 				      struct mlxsw_sp_fid *f)
3222 {
3223 	u32 tb_id = l3mdev_fib_table(l3_dev);
3224 	struct mlxsw_sp_rif *rif;
3225 	struct mlxsw_sp_vr *vr;
3226 	u16 rif_index;
3227 	int err;
3228 
3229 	rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp);
3230 	if (rif_index == MLXSW_SP_INVALID_INDEX_RIF)
3231 		return -ERANGE;
3232 
3233 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
3234 	if (IS_ERR(vr))
3235 		return PTR_ERR(vr);
3236 
3237 	err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
3238 	if (err)
3239 		goto err_port_flood_set;
3240 
3241 	err = mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid,
3242 				     rif_index, true);
3243 	if (err)
3244 		goto err_rif_bridge_op;
3245 
3246 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
3247 	if (err)
3248 		goto err_rif_fdb_op;
3249 
3250 	rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f);
3251 	if (!rif) {
3252 		err = -ENOMEM;
3253 		goto err_rif_alloc;
3254 	}
3255 
3256 	f->rif = rif;
3257 	mlxsw_sp->rifs[rif_index] = rif;
3258 	vr->rif_count++;
3259 
3260 	netdev_dbg(l3_dev, "RIF=%d created\n", rif_index);
3261 
3262 	return 0;
3263 
3264 err_rif_alloc:
3265 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
3266 err_rif_fdb_op:
3267 	mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
3268 			       false);
3269 err_rif_bridge_op:
3270 	mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3271 err_port_flood_set:
3272 	mlxsw_sp_vr_put(vr);
3273 	return err;
3274 }
3275 
3276 void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
3277 				 struct mlxsw_sp_rif *rif)
3278 {
3279 	struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id];
3280 	struct net_device *l3_dev = rif->dev;
3281 	struct mlxsw_sp_fid *f = rif->f;
3282 	u16 rif_index = rif->rif_index;
3283 
3284 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
3285 
3286 	vr->rif_count--;
3287 	mlxsw_sp->rifs[rif_index] = NULL;
3288 	f->rif = NULL;
3289 
3290 	kfree(rif);
3291 
3292 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
3293 
3294 	mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index,
3295 			       false);
3296 
3297 	mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3298 
3299 	mlxsw_sp_vr_put(vr);
3300 
3301 	netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif_index);
3302 }
3303 
3304 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
3305 					  struct net_device *br_dev,
3306 					  unsigned long event)
3307 {
3308 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3309 	struct mlxsw_sp_fid *f;
3310 
3311 	/* FID can either be an actual FID if the L3 device is the
3312 	 * VLAN-aware bridge or a VLAN device on top. Otherwise, the
3313 	 * L3 device is a VLAN-unaware bridge and we get a vFID.
3314 	 */
3315 	f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
3316 	if (WARN_ON(!f))
3317 		return -EINVAL;
3318 
3319 	switch (event) {
3320 	case NETDEV_UP:
3321 		return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
3322 	case NETDEV_DOWN:
3323 		mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif);
3324 		break;
3325 	}
3326 
3327 	return 0;
3328 }
3329 
3330 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
3331 					unsigned long event)
3332 {
3333 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
3334 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
3335 	u16 vid = vlan_dev_vlan_id(vlan_dev);
3336 
3337 	if (mlxsw_sp_port_dev_check(real_dev))
3338 		return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
3339 						     vid);
3340 	else if (netif_is_lag_master(real_dev))
3341 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
3342 						     vid);
3343 	else if (netif_is_bridge_master(real_dev) &&
3344 		 mlxsw_sp->master_bridge.dev == real_dev)
3345 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
3346 						      event);
3347 
3348 	return 0;
3349 }
3350 
3351 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
3352 				     unsigned long event)
3353 {
3354 	if (mlxsw_sp_port_dev_check(dev))
3355 		return mlxsw_sp_inetaddr_port_event(dev, event);
3356 	else if (netif_is_lag_master(dev))
3357 		return mlxsw_sp_inetaddr_lag_event(dev, event);
3358 	else if (netif_is_bridge_master(dev))
3359 		return mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
3360 	else if (is_vlan_dev(dev))
3361 		return mlxsw_sp_inetaddr_vlan_event(dev, event);
3362 	else
3363 		return 0;
3364 }
3365 
3366 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
3367 			    unsigned long event, void *ptr)
3368 {
3369 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
3370 	struct net_device *dev = ifa->ifa_dev->dev;
3371 	struct mlxsw_sp *mlxsw_sp;
3372 	struct mlxsw_sp_rif *rif;
3373 	int err = 0;
3374 
3375 	mlxsw_sp = mlxsw_sp_lower_get(dev);
3376 	if (!mlxsw_sp)
3377 		goto out;
3378 
3379 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3380 	if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event))
3381 		goto out;
3382 
3383 	err = __mlxsw_sp_inetaddr_event(dev, event);
3384 out:
3385 	return notifier_from_errno(err);
3386 }
3387 
3388 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
3389 			     const char *mac, int mtu)
3390 {
3391 	char ritr_pl[MLXSW_REG_RITR_LEN];
3392 	int err;
3393 
3394 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
3395 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3396 	if (err)
3397 		return err;
3398 
3399 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
3400 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
3401 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
3402 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3403 }
3404 
3405 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
3406 {
3407 	struct mlxsw_sp *mlxsw_sp;
3408 	struct mlxsw_sp_rif *rif;
3409 	int err;
3410 
3411 	mlxsw_sp = mlxsw_sp_lower_get(dev);
3412 	if (!mlxsw_sp)
3413 		return 0;
3414 
3415 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3416 	if (!rif)
3417 		return 0;
3418 
3419 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, false);
3420 	if (err)
3421 		return err;
3422 
3423 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
3424 				dev->mtu);
3425 	if (err)
3426 		goto err_rif_edit;
3427 
3428 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, rif->f->fid, true);
3429 	if (err)
3430 		goto err_rif_fdb_op;
3431 
3432 	ether_addr_copy(rif->addr, dev->dev_addr);
3433 	rif->mtu = dev->mtu;
3434 
3435 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
3436 
3437 	return 0;
3438 
3439 err_rif_fdb_op:
3440 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
3441 err_rif_edit:
3442 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, true);
3443 	return err;
3444 }
3445 
3446 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
3447 				  struct net_device *l3_dev)
3448 {
3449 	struct mlxsw_sp_rif *rif;
3450 
3451 	/* If netdev is already associated with a RIF, then we need to
3452 	 * destroy it and create a new one with the new virtual router ID.
3453 	 */
3454 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3455 	if (rif)
3456 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
3457 
3458 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
3459 }
3460 
3461 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
3462 				    struct net_device *l3_dev)
3463 {
3464 	struct mlxsw_sp_rif *rif;
3465 
3466 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
3467 	if (!rif)
3468 		return;
3469 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
3470 }
3471 
3472 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
3473 				 struct netdev_notifier_changeupper_info *info)
3474 {
3475 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3476 	int err = 0;
3477 
3478 	if (!mlxsw_sp)
3479 		return 0;
3480 
3481 	switch (event) {
3482 	case NETDEV_PRECHANGEUPPER:
3483 		return 0;
3484 	case NETDEV_CHANGEUPPER:
3485 		if (info->linking)
3486 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
3487 		else
3488 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
3489 		break;
3490 	}
3491 
3492 	return err;
3493 }
3494 
3495 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
3496 {
3497 	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
3498 
3499 	/* Flush pending FIB notifications and then flush the device's
3500 	 * table before requesting another dump. The FIB notification
3501 	 * block is unregistered, so no need to take RTNL.
3502 	 */
3503 	mlxsw_core_flush_owq();
3504 	mlxsw_sp_router_fib_flush(mlxsw_sp);
3505 }
3506 
3507 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3508 {
3509 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
3510 	u64 max_rifs;
3511 	int err;
3512 
3513 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
3514 		return -EIO;
3515 
3516 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
3517 	mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
3518 				 GFP_KERNEL);
3519 	if (!mlxsw_sp->rifs)
3520 		return -ENOMEM;
3521 
3522 	mlxsw_reg_rgcr_pack(rgcr_pl, true);
3523 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
3524 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3525 	if (err)
3526 		goto err_rgcr_fail;
3527 
3528 	return 0;
3529 
3530 err_rgcr_fail:
3531 	kfree(mlxsw_sp->rifs);
3532 	return err;
3533 }
3534 
3535 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3536 {
3537 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
3538 	int i;
3539 
3540 	mlxsw_reg_rgcr_pack(rgcr_pl, false);
3541 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3542 
3543 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
3544 		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
3545 
3546 	kfree(mlxsw_sp->rifs);
3547 }
3548 
3549 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3550 {
3551 	int err;
3552 
3553 	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
3554 	err = __mlxsw_sp_router_init(mlxsw_sp);
3555 	if (err)
3556 		return err;
3557 
3558 	err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
3559 			      &mlxsw_sp_nexthop_ht_params);
3560 	if (err)
3561 		goto err_nexthop_ht_init;
3562 
3563 	err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
3564 			      &mlxsw_sp_nexthop_group_ht_params);
3565 	if (err)
3566 		goto err_nexthop_group_ht_init;
3567 
3568 	err = mlxsw_sp_lpm_init(mlxsw_sp);
3569 	if (err)
3570 		goto err_lpm_init;
3571 
3572 	err = mlxsw_sp_vrs_init(mlxsw_sp);
3573 	if (err)
3574 		goto err_vrs_init;
3575 
3576 	err = mlxsw_sp_neigh_init(mlxsw_sp);
3577 	if (err)
3578 		goto err_neigh_init;
3579 
3580 	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
3581 	err = register_fib_notifier(&mlxsw_sp->fib_nb,
3582 				    mlxsw_sp_router_fib_dump_flush);
3583 	if (err)
3584 		goto err_register_fib_notifier;
3585 
3586 	return 0;
3587 
3588 err_register_fib_notifier:
3589 	mlxsw_sp_neigh_fini(mlxsw_sp);
3590 err_neigh_init:
3591 	mlxsw_sp_vrs_fini(mlxsw_sp);
3592 err_vrs_init:
3593 	mlxsw_sp_lpm_fini(mlxsw_sp);
3594 err_lpm_init:
3595 	rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3596 err_nexthop_group_ht_init:
3597 	rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3598 err_nexthop_ht_init:
3599 	__mlxsw_sp_router_fini(mlxsw_sp);
3600 	return err;
3601 }
3602 
3603 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3604 {
3605 	unregister_fib_notifier(&mlxsw_sp->fib_nb);
3606 	mlxsw_sp_neigh_fini(mlxsw_sp);
3607 	mlxsw_sp_vrs_fini(mlxsw_sp);
3608 	mlxsw_sp_lpm_fini(mlxsw_sp);
3609 	rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3610 	rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3611 	__mlxsw_sp_router_fini(mlxsw_sp);
3612 }
3613