1 /* 2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> 5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> 6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the names of the copyright holders nor the names of its 17 * contributors may be used to endorse or promote products derived from 18 * this software without specific prior written permission. 19 * 20 * Alternatively, this software may be distributed under the terms of the 21 * GNU General Public License ("GPL") version 2 as published by the Free 22 * Software Foundation. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <linux/kernel.h> 38 #include <linux/types.h> 39 #include <linux/rhashtable.h> 40 #include <linux/bitops.h> 41 #include <linux/in6.h> 42 #include <linux/notifier.h> 43 #include <linux/inetdevice.h> 44 #include <net/netevent.h> 45 #include <net/neighbour.h> 46 #include <net/arp.h> 47 #include <net/ip_fib.h> 48 49 #include "spectrum.h" 50 #include "core.h" 51 #include "reg.h" 52 53 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ 54 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) 55 56 static bool 57 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, 58 struct mlxsw_sp_prefix_usage *prefix_usage2) 59 { 60 unsigned char prefix; 61 62 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { 63 if (!test_bit(prefix, prefix_usage2->b)) 64 return false; 65 } 66 return true; 67 } 68 69 static bool 70 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, 71 struct mlxsw_sp_prefix_usage *prefix_usage2) 72 { 73 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 74 } 75 76 static bool 77 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) 78 { 79 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; 80 81 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); 82 } 83 84 static void 85 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, 86 struct mlxsw_sp_prefix_usage *prefix_usage2) 87 { 88 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 89 } 90 91 static void 92 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) 93 { 94 memset(prefix_usage, 0, sizeof(*prefix_usage)); 95 } 96 97 static void 98 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, 99 unsigned char prefix_len) 100 { 101 set_bit(prefix_len, prefix_usage->b); 102 } 103 104 static void 105 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, 106 unsigned char prefix_len) 107 { 108 clear_bit(prefix_len, prefix_usage->b); 109 } 110 111 struct mlxsw_sp_fib_key { 112 unsigned char addr[sizeof(struct in6_addr)]; 113 unsigned char prefix_len; 114 }; 115 116 enum mlxsw_sp_fib_entry_type { 117 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, 118 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, 119 MLXSW_SP_FIB_ENTRY_TYPE_TRAP, 120 }; 121 122 struct mlxsw_sp_nexthop_group; 123 124 struct mlxsw_sp_fib_node { 125 struct list_head entry_list; 126 struct list_head list; 127 struct rhash_head ht_node; 128 struct mlxsw_sp_vr *vr; 129 struct mlxsw_sp_fib_key key; 130 }; 131 132 struct mlxsw_sp_fib_entry_params { 133 u32 tb_id; 134 u32 prio; 135 u8 tos; 136 u8 type; 137 }; 138 139 struct mlxsw_sp_fib_entry { 140 struct list_head list; 141 struct mlxsw_sp_fib_node *fib_node; 142 enum mlxsw_sp_fib_entry_type type; 143 struct list_head nexthop_group_node; 144 struct mlxsw_sp_nexthop_group *nh_group; 145 struct mlxsw_sp_fib_entry_params params; 146 bool offloaded; 147 }; 148 149 struct mlxsw_sp_fib { 150 struct rhashtable ht; 151 struct list_head node_list; 152 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; 153 struct mlxsw_sp_prefix_usage prefix_usage; 154 }; 155 156 static const struct rhashtable_params mlxsw_sp_fib_ht_params; 157 158 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) 159 { 160 struct mlxsw_sp_fib *fib; 161 int err; 162 163 fib = kzalloc(sizeof(*fib), GFP_KERNEL); 164 if (!fib) 165 return ERR_PTR(-ENOMEM); 166 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); 167 if (err) 168 goto err_rhashtable_init; 169 INIT_LIST_HEAD(&fib->node_list); 170 return fib; 171 172 err_rhashtable_init: 173 kfree(fib); 174 return ERR_PTR(err); 175 } 176 177 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) 178 { 179 WARN_ON(!list_empty(&fib->node_list)); 180 rhashtable_destroy(&fib->ht); 181 kfree(fib); 182 } 183 184 static struct mlxsw_sp_lpm_tree * 185 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) 186 { 187 static struct mlxsw_sp_lpm_tree *lpm_tree; 188 int i; 189 190 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 191 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 192 if (lpm_tree->ref_count == 0) { 193 if (one_reserved) 194 one_reserved = false; 195 else 196 return lpm_tree; 197 } 198 } 199 return NULL; 200 } 201 202 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, 203 struct mlxsw_sp_lpm_tree *lpm_tree) 204 { 205 char ralta_pl[MLXSW_REG_RALTA_LEN]; 206 207 mlxsw_reg_ralta_pack(ralta_pl, true, 208 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 209 lpm_tree->id); 210 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 211 } 212 213 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, 214 struct mlxsw_sp_lpm_tree *lpm_tree) 215 { 216 char ralta_pl[MLXSW_REG_RALTA_LEN]; 217 218 mlxsw_reg_ralta_pack(ralta_pl, false, 219 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 220 lpm_tree->id); 221 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 222 } 223 224 static int 225 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, 226 struct mlxsw_sp_prefix_usage *prefix_usage, 227 struct mlxsw_sp_lpm_tree *lpm_tree) 228 { 229 char ralst_pl[MLXSW_REG_RALST_LEN]; 230 u8 root_bin = 0; 231 u8 prefix; 232 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; 233 234 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) 235 root_bin = prefix; 236 237 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); 238 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { 239 if (prefix == 0) 240 continue; 241 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, 242 MLXSW_REG_RALST_BIN_NO_CHILD); 243 last_prefix = prefix; 244 } 245 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 246 } 247 248 static struct mlxsw_sp_lpm_tree * 249 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, 250 struct mlxsw_sp_prefix_usage *prefix_usage, 251 enum mlxsw_sp_l3proto proto, bool one_reserved) 252 { 253 struct mlxsw_sp_lpm_tree *lpm_tree; 254 int err; 255 256 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); 257 if (!lpm_tree) 258 return ERR_PTR(-EBUSY); 259 lpm_tree->proto = proto; 260 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); 261 if (err) 262 return ERR_PTR(err); 263 264 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, 265 lpm_tree); 266 if (err) 267 goto err_left_struct_set; 268 memcpy(&lpm_tree->prefix_usage, prefix_usage, 269 sizeof(lpm_tree->prefix_usage)); 270 return lpm_tree; 271 272 err_left_struct_set: 273 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 274 return ERR_PTR(err); 275 } 276 277 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, 278 struct mlxsw_sp_lpm_tree *lpm_tree) 279 { 280 return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 281 } 282 283 static struct mlxsw_sp_lpm_tree * 284 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, 285 struct mlxsw_sp_prefix_usage *prefix_usage, 286 enum mlxsw_sp_l3proto proto, bool one_reserved) 287 { 288 struct mlxsw_sp_lpm_tree *lpm_tree; 289 int i; 290 291 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 292 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 293 if (lpm_tree->ref_count != 0 && 294 lpm_tree->proto == proto && 295 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, 296 prefix_usage)) 297 goto inc_ref_count; 298 } 299 lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, 300 proto, one_reserved); 301 if (IS_ERR(lpm_tree)) 302 return lpm_tree; 303 304 inc_ref_count: 305 lpm_tree->ref_count++; 306 return lpm_tree; 307 } 308 309 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, 310 struct mlxsw_sp_lpm_tree *lpm_tree) 311 { 312 if (--lpm_tree->ref_count == 0) 313 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); 314 return 0; 315 } 316 317 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) 318 { 319 struct mlxsw_sp_lpm_tree *lpm_tree; 320 int i; 321 322 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 323 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 324 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; 325 } 326 } 327 328 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) 329 { 330 struct mlxsw_sp_vr *vr; 331 int i; 332 333 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 334 vr = &mlxsw_sp->router.vrs[i]; 335 if (!vr->used) 336 return vr; 337 } 338 return NULL; 339 } 340 341 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, 342 struct mlxsw_sp_vr *vr) 343 { 344 char raltb_pl[MLXSW_REG_RALTB_LEN]; 345 346 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 347 (enum mlxsw_reg_ralxx_protocol) vr->proto, 348 vr->lpm_tree->id); 349 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 350 } 351 352 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, 353 struct mlxsw_sp_vr *vr) 354 { 355 char raltb_pl[MLXSW_REG_RALTB_LEN]; 356 357 /* Bind to tree 0 which is default */ 358 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 359 (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); 360 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 361 } 362 363 static u32 mlxsw_sp_fix_tb_id(u32 tb_id) 364 { 365 /* For our purpose, squash main and local table into one */ 366 if (tb_id == RT_TABLE_LOCAL) 367 tb_id = RT_TABLE_MAIN; 368 return tb_id; 369 } 370 371 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, 372 u32 tb_id, 373 enum mlxsw_sp_l3proto proto) 374 { 375 struct mlxsw_sp_vr *vr; 376 int i; 377 378 tb_id = mlxsw_sp_fix_tb_id(tb_id); 379 380 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 381 vr = &mlxsw_sp->router.vrs[i]; 382 if (vr->used && vr->proto == proto && vr->tb_id == tb_id) 383 return vr; 384 } 385 return NULL; 386 } 387 388 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, 389 unsigned char prefix_len, 390 u32 tb_id, 391 enum mlxsw_sp_l3proto proto) 392 { 393 struct mlxsw_sp_prefix_usage req_prefix_usage; 394 struct mlxsw_sp_lpm_tree *lpm_tree; 395 struct mlxsw_sp_vr *vr; 396 int err; 397 398 vr = mlxsw_sp_vr_find_unused(mlxsw_sp); 399 if (!vr) 400 return ERR_PTR(-EBUSY); 401 vr->fib = mlxsw_sp_fib_create(); 402 if (IS_ERR(vr->fib)) 403 return ERR_CAST(vr->fib); 404 405 vr->proto = proto; 406 vr->tb_id = tb_id; 407 mlxsw_sp_prefix_usage_zero(&req_prefix_usage); 408 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 409 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, 410 proto, true); 411 if (IS_ERR(lpm_tree)) { 412 err = PTR_ERR(lpm_tree); 413 goto err_tree_get; 414 } 415 vr->lpm_tree = lpm_tree; 416 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 417 if (err) 418 goto err_tree_bind; 419 420 vr->used = true; 421 return vr; 422 423 err_tree_bind: 424 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 425 err_tree_get: 426 mlxsw_sp_fib_destroy(vr->fib); 427 428 return ERR_PTR(err); 429 } 430 431 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, 432 struct mlxsw_sp_vr *vr) 433 { 434 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 435 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 436 mlxsw_sp_fib_destroy(vr->fib); 437 vr->used = false; 438 } 439 440 static int 441 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, 442 struct mlxsw_sp_prefix_usage *req_prefix_usage) 443 { 444 struct mlxsw_sp_lpm_tree *lpm_tree; 445 446 if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, 447 &vr->lpm_tree->prefix_usage)) 448 return 0; 449 450 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, 451 vr->proto, false); 452 if (IS_ERR(lpm_tree)) { 453 /* We failed to get a tree according to the required 454 * prefix usage. However, the current tree might be still good 455 * for us if our requirement is subset of the prefixes used 456 * in the tree. 457 */ 458 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, 459 &vr->lpm_tree->prefix_usage)) 460 return 0; 461 return PTR_ERR(lpm_tree); 462 } 463 464 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 465 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 466 vr->lpm_tree = lpm_tree; 467 return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 468 } 469 470 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, 471 unsigned char prefix_len, 472 u32 tb_id, 473 enum mlxsw_sp_l3proto proto) 474 { 475 struct mlxsw_sp_vr *vr; 476 int err; 477 478 tb_id = mlxsw_sp_fix_tb_id(tb_id); 479 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); 480 if (!vr) { 481 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); 482 if (IS_ERR(vr)) 483 return vr; 484 } else { 485 struct mlxsw_sp_prefix_usage req_prefix_usage; 486 487 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, 488 &vr->fib->prefix_usage); 489 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 490 /* Need to replace LPM tree in case new prefix is required. */ 491 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 492 &req_prefix_usage); 493 if (err) 494 return ERR_PTR(err); 495 } 496 return vr; 497 } 498 499 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) 500 { 501 /* Destroy virtual router entity in case the associated FIB is empty 502 * and allow it to be used for other tables in future. Otherwise, 503 * check if some prefix usage did not disappear and change tree if 504 * that is the case. Note that in case new, smaller tree cannot be 505 * allocated, the original one will be kept being used. 506 */ 507 if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) 508 mlxsw_sp_vr_destroy(mlxsw_sp, vr); 509 else 510 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 511 &vr->fib->prefix_usage); 512 } 513 514 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) 515 { 516 struct mlxsw_sp_vr *vr; 517 u64 max_vrs; 518 int i; 519 520 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS)) 521 return -EIO; 522 523 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); 524 mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr), 525 GFP_KERNEL); 526 if (!mlxsw_sp->router.vrs) 527 return -ENOMEM; 528 529 for (i = 0; i < max_vrs; i++) { 530 vr = &mlxsw_sp->router.vrs[i]; 531 vr->id = i; 532 } 533 534 return 0; 535 } 536 537 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp); 538 539 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) 540 { 541 /* At this stage we're guaranteed not to have new incoming 542 * FIB notifications and the work queue is free from FIBs 543 * sitting on top of mlxsw netdevs. However, we can still 544 * have other FIBs queued. Flush the queue before flushing 545 * the device's tables. No need for locks, as we're the only 546 * writer. 547 */ 548 mlxsw_core_flush_owq(); 549 mlxsw_sp_router_fib_flush(mlxsw_sp); 550 kfree(mlxsw_sp->router.vrs); 551 } 552 553 struct mlxsw_sp_neigh_key { 554 struct neighbour *n; 555 }; 556 557 struct mlxsw_sp_neigh_entry { 558 struct list_head rif_list_node; 559 struct rhash_head ht_node; 560 struct mlxsw_sp_neigh_key key; 561 u16 rif; 562 bool connected; 563 unsigned char ha[ETH_ALEN]; 564 struct list_head nexthop_list; /* list of nexthops using 565 * this neigh entry 566 */ 567 struct list_head nexthop_neighs_list_node; 568 }; 569 570 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { 571 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), 572 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), 573 .key_len = sizeof(struct mlxsw_sp_neigh_key), 574 }; 575 576 static struct mlxsw_sp_neigh_entry * 577 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, 578 u16 rif) 579 { 580 struct mlxsw_sp_neigh_entry *neigh_entry; 581 582 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL); 583 if (!neigh_entry) 584 return NULL; 585 586 neigh_entry->key.n = n; 587 neigh_entry->rif = rif; 588 INIT_LIST_HEAD(&neigh_entry->nexthop_list); 589 590 return neigh_entry; 591 } 592 593 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry) 594 { 595 kfree(neigh_entry); 596 } 597 598 static int 599 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, 600 struct mlxsw_sp_neigh_entry *neigh_entry) 601 { 602 return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, 603 &neigh_entry->ht_node, 604 mlxsw_sp_neigh_ht_params); 605 } 606 607 static void 608 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, 609 struct mlxsw_sp_neigh_entry *neigh_entry) 610 { 611 rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, 612 &neigh_entry->ht_node, 613 mlxsw_sp_neigh_ht_params); 614 } 615 616 static struct mlxsw_sp_neigh_entry * 617 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) 618 { 619 struct mlxsw_sp_neigh_entry *neigh_entry; 620 struct mlxsw_sp_rif *r; 621 int err; 622 623 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); 624 if (!r) 625 return ERR_PTR(-EINVAL); 626 627 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif); 628 if (!neigh_entry) 629 return ERR_PTR(-ENOMEM); 630 631 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); 632 if (err) 633 goto err_neigh_entry_insert; 634 635 list_add(&neigh_entry->rif_list_node, &r->neigh_list); 636 637 return neigh_entry; 638 639 err_neigh_entry_insert: 640 mlxsw_sp_neigh_entry_free(neigh_entry); 641 return ERR_PTR(err); 642 } 643 644 static void 645 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, 646 struct mlxsw_sp_neigh_entry *neigh_entry) 647 { 648 list_del(&neigh_entry->rif_list_node); 649 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); 650 mlxsw_sp_neigh_entry_free(neigh_entry); 651 } 652 653 static struct mlxsw_sp_neigh_entry * 654 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) 655 { 656 struct mlxsw_sp_neigh_key key; 657 658 key.n = n; 659 return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, 660 &key, mlxsw_sp_neigh_ht_params); 661 } 662 663 static void 664 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) 665 { 666 unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); 667 668 mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); 669 } 670 671 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, 672 char *rauhtd_pl, 673 int ent_index) 674 { 675 struct net_device *dev; 676 struct neighbour *n; 677 __be32 dipn; 678 u32 dip; 679 u16 rif; 680 681 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); 682 683 if (!mlxsw_sp->rifs[rif]) { 684 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); 685 return; 686 } 687 688 dipn = htonl(dip); 689 dev = mlxsw_sp->rifs[rif]->dev; 690 n = neigh_lookup(&arp_tbl, &dipn, dev); 691 if (!n) { 692 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", 693 &dip); 694 return; 695 } 696 697 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); 698 neigh_event_send(n, NULL); 699 neigh_release(n); 700 } 701 702 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, 703 char *rauhtd_pl, 704 int rec_index) 705 { 706 u8 num_entries; 707 int i; 708 709 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 710 rec_index); 711 /* Hardware starts counting at 0, so add 1. */ 712 num_entries++; 713 714 /* Each record consists of several neighbour entries. */ 715 for (i = 0; i < num_entries; i++) { 716 int ent_index; 717 718 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; 719 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, 720 ent_index); 721 } 722 723 } 724 725 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, 726 char *rauhtd_pl, int rec_index) 727 { 728 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { 729 case MLXSW_REG_RAUHTD_TYPE_IPV4: 730 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, 731 rec_index); 732 break; 733 case MLXSW_REG_RAUHTD_TYPE_IPV6: 734 WARN_ON_ONCE(1); 735 break; 736 } 737 } 738 739 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) 740 { 741 u8 num_rec, last_rec_index, num_entries; 742 743 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 744 last_rec_index = num_rec - 1; 745 746 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) 747 return false; 748 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == 749 MLXSW_REG_RAUHTD_TYPE_IPV6) 750 return true; 751 752 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 753 last_rec_index); 754 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) 755 return true; 756 return false; 757 } 758 759 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) 760 { 761 char *rauhtd_pl; 762 u8 num_rec; 763 int i, err; 764 765 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); 766 if (!rauhtd_pl) 767 return -ENOMEM; 768 769 /* Make sure the neighbour's netdev isn't removed in the 770 * process. 771 */ 772 rtnl_lock(); 773 do { 774 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); 775 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), 776 rauhtd_pl); 777 if (err) { 778 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); 779 break; 780 } 781 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 782 for (i = 0; i < num_rec; i++) 783 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, 784 i); 785 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); 786 rtnl_unlock(); 787 788 kfree(rauhtd_pl); 789 return err; 790 } 791 792 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) 793 { 794 struct mlxsw_sp_neigh_entry *neigh_entry; 795 796 /* Take RTNL mutex here to prevent lists from changes */ 797 rtnl_lock(); 798 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 799 nexthop_neighs_list_node) 800 /* If this neigh have nexthops, make the kernel think this neigh 801 * is active regardless of the traffic. 802 */ 803 neigh_event_send(neigh_entry->key.n, NULL); 804 rtnl_unlock(); 805 } 806 807 static void 808 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) 809 { 810 unsigned long interval = mlxsw_sp->router.neighs_update.interval; 811 812 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 813 msecs_to_jiffies(interval)); 814 } 815 816 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) 817 { 818 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 819 router.neighs_update.dw.work); 820 int err; 821 822 err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); 823 if (err) 824 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); 825 826 mlxsw_sp_router_neighs_update_nh(mlxsw_sp); 827 828 mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); 829 } 830 831 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) 832 { 833 struct mlxsw_sp_neigh_entry *neigh_entry; 834 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 835 router.nexthop_probe_dw.work); 836 837 /* Iterate over nexthop neighbours, find those who are unresolved and 838 * send arp on them. This solves the chicken-egg problem when 839 * the nexthop wouldn't get offloaded until the neighbor is resolved 840 * but it wouldn't get resolved ever in case traffic is flowing in HW 841 * using different nexthop. 842 * 843 * Take RTNL mutex here to prevent lists from changes. 844 */ 845 rtnl_lock(); 846 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 847 nexthop_neighs_list_node) 848 if (!neigh_entry->connected) 849 neigh_event_send(neigh_entry->key.n, NULL); 850 rtnl_unlock(); 851 852 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 853 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); 854 } 855 856 static void 857 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 858 struct mlxsw_sp_neigh_entry *neigh_entry, 859 bool removing); 860 861 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) 862 { 863 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD : 864 MLXSW_REG_RAUHT_OP_WRITE_DELETE; 865 } 866 867 static void 868 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, 869 struct mlxsw_sp_neigh_entry *neigh_entry, 870 enum mlxsw_reg_rauht_op op) 871 { 872 struct neighbour *n = neigh_entry->key.n; 873 u32 dip = ntohl(*((__be32 *) n->primary_key)); 874 char rauht_pl[MLXSW_REG_RAUHT_LEN]; 875 876 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, 877 dip); 878 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); 879 } 880 881 static void 882 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, 883 struct mlxsw_sp_neigh_entry *neigh_entry, 884 bool adding) 885 { 886 if (!adding && !neigh_entry->connected) 887 return; 888 neigh_entry->connected = adding; 889 if (neigh_entry->key.n->tbl == &arp_tbl) 890 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, 891 mlxsw_sp_rauht_op(adding)); 892 else 893 WARN_ON_ONCE(1); 894 } 895 896 struct mlxsw_sp_neigh_event_work { 897 struct work_struct work; 898 struct mlxsw_sp *mlxsw_sp; 899 struct neighbour *n; 900 }; 901 902 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) 903 { 904 struct mlxsw_sp_neigh_event_work *neigh_work = 905 container_of(work, struct mlxsw_sp_neigh_event_work, work); 906 struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp; 907 struct mlxsw_sp_neigh_entry *neigh_entry; 908 struct neighbour *n = neigh_work->n; 909 unsigned char ha[ETH_ALEN]; 910 bool entry_connected; 911 u8 nud_state, dead; 912 913 /* If these parameters are changed after we release the lock, 914 * then we are guaranteed to receive another event letting us 915 * know about it. 916 */ 917 read_lock_bh(&n->lock); 918 memcpy(ha, n->ha, ETH_ALEN); 919 nud_state = n->nud_state; 920 dead = n->dead; 921 read_unlock_bh(&n->lock); 922 923 rtnl_lock(); 924 entry_connected = nud_state & NUD_VALID && !dead; 925 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 926 if (!entry_connected && !neigh_entry) 927 goto out; 928 if (!neigh_entry) { 929 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); 930 if (IS_ERR(neigh_entry)) 931 goto out; 932 } 933 934 memcpy(neigh_entry->ha, ha, ETH_ALEN); 935 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); 936 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); 937 938 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) 939 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 940 941 out: 942 rtnl_unlock(); 943 neigh_release(n); 944 kfree(neigh_work); 945 } 946 947 int mlxsw_sp_router_netevent_event(struct notifier_block *unused, 948 unsigned long event, void *ptr) 949 { 950 struct mlxsw_sp_neigh_event_work *neigh_work; 951 struct mlxsw_sp_port *mlxsw_sp_port; 952 struct mlxsw_sp *mlxsw_sp; 953 unsigned long interval; 954 struct neigh_parms *p; 955 struct neighbour *n; 956 957 switch (event) { 958 case NETEVENT_DELAY_PROBE_TIME_UPDATE: 959 p = ptr; 960 961 /* We don't care about changes in the default table. */ 962 if (!p->dev || p->tbl != &arp_tbl) 963 return NOTIFY_DONE; 964 965 /* We are in atomic context and can't take RTNL mutex, 966 * so use RCU variant to walk the device chain. 967 */ 968 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); 969 if (!mlxsw_sp_port) 970 return NOTIFY_DONE; 971 972 mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 973 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); 974 mlxsw_sp->router.neighs_update.interval = interval; 975 976 mlxsw_sp_port_dev_put(mlxsw_sp_port); 977 break; 978 case NETEVENT_NEIGH_UPDATE: 979 n = ptr; 980 981 if (n->tbl != &arp_tbl) 982 return NOTIFY_DONE; 983 984 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); 985 if (!mlxsw_sp_port) 986 return NOTIFY_DONE; 987 988 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC); 989 if (!neigh_work) { 990 mlxsw_sp_port_dev_put(mlxsw_sp_port); 991 return NOTIFY_BAD; 992 } 993 994 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work); 995 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 996 neigh_work->n = n; 997 998 /* Take a reference to ensure the neighbour won't be 999 * destructed until we drop the reference in delayed 1000 * work. 1001 */ 1002 neigh_clone(n); 1003 mlxsw_core_schedule_work(&neigh_work->work); 1004 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1005 break; 1006 } 1007 1008 return NOTIFY_DONE; 1009 } 1010 1011 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) 1012 { 1013 int err; 1014 1015 err = rhashtable_init(&mlxsw_sp->router.neigh_ht, 1016 &mlxsw_sp_neigh_ht_params); 1017 if (err) 1018 return err; 1019 1020 /* Initialize the polling interval according to the default 1021 * table. 1022 */ 1023 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); 1024 1025 /* Create the delayed works for the activity_update */ 1026 INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, 1027 mlxsw_sp_router_neighs_update_work); 1028 INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, 1029 mlxsw_sp_router_probe_unresolved_nexthops); 1030 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); 1031 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); 1032 return 0; 1033 } 1034 1035 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) 1036 { 1037 cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); 1038 cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); 1039 rhashtable_destroy(&mlxsw_sp->router.neigh_ht); 1040 } 1041 1042 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, 1043 const struct mlxsw_sp_rif *r) 1044 { 1045 char rauht_pl[MLXSW_REG_RAUHT_LEN]; 1046 1047 mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, 1048 r->rif, r->addr); 1049 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); 1050 } 1051 1052 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 1053 struct mlxsw_sp_rif *r) 1054 { 1055 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; 1056 1057 mlxsw_sp_neigh_rif_flush(mlxsw_sp, r); 1058 list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list, 1059 rif_list_node) 1060 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 1061 } 1062 1063 struct mlxsw_sp_nexthop_key { 1064 struct fib_nh *fib_nh; 1065 }; 1066 1067 struct mlxsw_sp_nexthop { 1068 struct list_head neigh_list_node; /* member of neigh entry list */ 1069 struct list_head rif_list_node; 1070 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group 1071 * this belongs to 1072 */ 1073 struct rhash_head ht_node; 1074 struct mlxsw_sp_nexthop_key key; 1075 struct mlxsw_sp_rif *r; 1076 u8 should_offload:1, /* set indicates this neigh is connected and 1077 * should be put to KVD linear area of this group. 1078 */ 1079 offloaded:1, /* set in case the neigh is actually put into 1080 * KVD linear area of this group. 1081 */ 1082 update:1; /* set indicates that MAC of this neigh should be 1083 * updated in HW 1084 */ 1085 struct mlxsw_sp_neigh_entry *neigh_entry; 1086 }; 1087 1088 struct mlxsw_sp_nexthop_group_key { 1089 struct fib_info *fi; 1090 }; 1091 1092 struct mlxsw_sp_nexthop_group { 1093 struct rhash_head ht_node; 1094 struct list_head fib_list; /* list of fib entries that use this group */ 1095 struct mlxsw_sp_nexthop_group_key key; 1096 u8 adj_index_valid:1, 1097 gateway:1; /* routes using the group use a gateway */ 1098 u32 adj_index; 1099 u16 ecmp_size; 1100 u16 count; 1101 struct mlxsw_sp_nexthop nexthops[0]; 1102 #define nh_rif nexthops[0].r 1103 }; 1104 1105 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { 1106 .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key), 1107 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node), 1108 .key_len = sizeof(struct mlxsw_sp_nexthop_group_key), 1109 }; 1110 1111 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, 1112 struct mlxsw_sp_nexthop_group *nh_grp) 1113 { 1114 return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht, 1115 &nh_grp->ht_node, 1116 mlxsw_sp_nexthop_group_ht_params); 1117 } 1118 1119 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, 1120 struct mlxsw_sp_nexthop_group *nh_grp) 1121 { 1122 rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht, 1123 &nh_grp->ht_node, 1124 mlxsw_sp_nexthop_group_ht_params); 1125 } 1126 1127 static struct mlxsw_sp_nexthop_group * 1128 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp, 1129 struct mlxsw_sp_nexthop_group_key key) 1130 { 1131 return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key, 1132 mlxsw_sp_nexthop_group_ht_params); 1133 } 1134 1135 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = { 1136 .key_offset = offsetof(struct mlxsw_sp_nexthop, key), 1137 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node), 1138 .key_len = sizeof(struct mlxsw_sp_nexthop_key), 1139 }; 1140 1141 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp, 1142 struct mlxsw_sp_nexthop *nh) 1143 { 1144 return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht, 1145 &nh->ht_node, mlxsw_sp_nexthop_ht_params); 1146 } 1147 1148 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp, 1149 struct mlxsw_sp_nexthop *nh) 1150 { 1151 rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node, 1152 mlxsw_sp_nexthop_ht_params); 1153 } 1154 1155 static struct mlxsw_sp_nexthop * 1156 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, 1157 struct mlxsw_sp_nexthop_key key) 1158 { 1159 return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key, 1160 mlxsw_sp_nexthop_ht_params); 1161 } 1162 1163 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, 1164 struct mlxsw_sp_vr *vr, 1165 u32 adj_index, u16 ecmp_size, 1166 u32 new_adj_index, 1167 u16 new_ecmp_size) 1168 { 1169 char raleu_pl[MLXSW_REG_RALEU_LEN]; 1170 1171 mlxsw_reg_raleu_pack(raleu_pl, 1172 (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, 1173 adj_index, ecmp_size, new_adj_index, 1174 new_ecmp_size); 1175 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); 1176 } 1177 1178 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, 1179 struct mlxsw_sp_nexthop_group *nh_grp, 1180 u32 old_adj_index, u16 old_ecmp_size) 1181 { 1182 struct mlxsw_sp_fib_entry *fib_entry; 1183 struct mlxsw_sp_vr *vr = NULL; 1184 int err; 1185 1186 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1187 if (vr == fib_entry->fib_node->vr) 1188 continue; 1189 vr = fib_entry->fib_node->vr; 1190 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, 1191 old_adj_index, 1192 old_ecmp_size, 1193 nh_grp->adj_index, 1194 nh_grp->ecmp_size); 1195 if (err) 1196 return err; 1197 } 1198 return 0; 1199 } 1200 1201 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, 1202 struct mlxsw_sp_nexthop *nh) 1203 { 1204 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1205 char ratr_pl[MLXSW_REG_RATR_LEN]; 1206 1207 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, 1208 true, adj_index, neigh_entry->rif); 1209 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); 1210 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); 1211 } 1212 1213 static int 1214 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, 1215 struct mlxsw_sp_nexthop_group *nh_grp, 1216 bool reallocate) 1217 { 1218 u32 adj_index = nh_grp->adj_index; /* base */ 1219 struct mlxsw_sp_nexthop *nh; 1220 int i; 1221 int err; 1222 1223 for (i = 0; i < nh_grp->count; i++) { 1224 nh = &nh_grp->nexthops[i]; 1225 1226 if (!nh->should_offload) { 1227 nh->offloaded = 0; 1228 continue; 1229 } 1230 1231 if (nh->update || reallocate) { 1232 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, 1233 adj_index, nh); 1234 if (err) 1235 return err; 1236 nh->update = 0; 1237 nh->offloaded = 1; 1238 } 1239 adj_index++; 1240 } 1241 return 0; 1242 } 1243 1244 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1245 struct mlxsw_sp_fib_entry *fib_entry); 1246 1247 static int 1248 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, 1249 struct mlxsw_sp_nexthop_group *nh_grp) 1250 { 1251 struct mlxsw_sp_fib_entry *fib_entry; 1252 int err; 1253 1254 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1255 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 1256 if (err) 1257 return err; 1258 } 1259 return 0; 1260 } 1261 1262 static void 1263 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, 1264 struct mlxsw_sp_nexthop_group *nh_grp) 1265 { 1266 struct mlxsw_sp_nexthop *nh; 1267 bool offload_change = false; 1268 u32 adj_index; 1269 u16 ecmp_size = 0; 1270 bool old_adj_index_valid; 1271 u32 old_adj_index; 1272 u16 old_ecmp_size; 1273 int ret; 1274 int i; 1275 int err; 1276 1277 if (!nh_grp->gateway) { 1278 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1279 return; 1280 } 1281 1282 for (i = 0; i < nh_grp->count; i++) { 1283 nh = &nh_grp->nexthops[i]; 1284 1285 if (nh->should_offload ^ nh->offloaded) { 1286 offload_change = true; 1287 if (nh->should_offload) 1288 nh->update = 1; 1289 } 1290 if (nh->should_offload) 1291 ecmp_size++; 1292 } 1293 if (!offload_change) { 1294 /* Nothing was added or removed, so no need to reallocate. Just 1295 * update MAC on existing adjacency indexes. 1296 */ 1297 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, 1298 false); 1299 if (err) { 1300 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1301 goto set_trap; 1302 } 1303 return; 1304 } 1305 if (!ecmp_size) 1306 /* No neigh of this group is connected so we just set 1307 * the trap and let everthing flow through kernel. 1308 */ 1309 goto set_trap; 1310 1311 ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); 1312 if (ret < 0) { 1313 /* We ran out of KVD linear space, just set the 1314 * trap and let everything flow through kernel. 1315 */ 1316 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); 1317 goto set_trap; 1318 } 1319 adj_index = ret; 1320 old_adj_index_valid = nh_grp->adj_index_valid; 1321 old_adj_index = nh_grp->adj_index; 1322 old_ecmp_size = nh_grp->ecmp_size; 1323 nh_grp->adj_index_valid = 1; 1324 nh_grp->adj_index = adj_index; 1325 nh_grp->ecmp_size = ecmp_size; 1326 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true); 1327 if (err) { 1328 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1329 goto set_trap; 1330 } 1331 1332 if (!old_adj_index_valid) { 1333 /* The trap was set for fib entries, so we have to call 1334 * fib entry update to unset it and use adjacency index. 1335 */ 1336 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1337 if (err) { 1338 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); 1339 goto set_trap; 1340 } 1341 return; 1342 } 1343 1344 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, 1345 old_adj_index, old_ecmp_size); 1346 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); 1347 if (err) { 1348 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); 1349 goto set_trap; 1350 } 1351 return; 1352 1353 set_trap: 1354 old_adj_index_valid = nh_grp->adj_index_valid; 1355 nh_grp->adj_index_valid = 0; 1356 for (i = 0; i < nh_grp->count; i++) { 1357 nh = &nh_grp->nexthops[i]; 1358 nh->offloaded = 0; 1359 } 1360 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1361 if (err) 1362 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); 1363 if (old_adj_index_valid) 1364 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); 1365 } 1366 1367 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, 1368 bool removing) 1369 { 1370 if (!removing && !nh->should_offload) 1371 nh->should_offload = 1; 1372 else if (removing && nh->offloaded) 1373 nh->should_offload = 0; 1374 nh->update = 1; 1375 } 1376 1377 static void 1378 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 1379 struct mlxsw_sp_neigh_entry *neigh_entry, 1380 bool removing) 1381 { 1382 struct mlxsw_sp_nexthop *nh; 1383 1384 list_for_each_entry(nh, &neigh_entry->nexthop_list, 1385 neigh_list_node) { 1386 __mlxsw_sp_nexthop_neigh_update(nh, removing); 1387 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1388 } 1389 } 1390 1391 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, 1392 struct mlxsw_sp_rif *r) 1393 { 1394 if (nh->r) 1395 return; 1396 1397 nh->r = r; 1398 list_add(&nh->rif_list_node, &r->nexthop_list); 1399 } 1400 1401 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) 1402 { 1403 if (!nh->r) 1404 return; 1405 1406 list_del(&nh->rif_list_node); 1407 nh->r = NULL; 1408 } 1409 1410 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, 1411 struct mlxsw_sp_nexthop *nh) 1412 { 1413 struct mlxsw_sp_neigh_entry *neigh_entry; 1414 struct fib_nh *fib_nh = nh->key.fib_nh; 1415 struct neighbour *n; 1416 u8 nud_state, dead; 1417 int err; 1418 1419 if (!nh->nh_grp->gateway || nh->neigh_entry) 1420 return 0; 1421 1422 /* Take a reference of neigh here ensuring that neigh would 1423 * not be detructed before the nexthop entry is finished. 1424 * The reference is taken either in neigh_lookup() or 1425 * in neigh_create() in case n is not found. 1426 */ 1427 n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); 1428 if (!n) { 1429 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); 1430 if (IS_ERR(n)) 1431 return PTR_ERR(n); 1432 neigh_event_send(n, NULL); 1433 } 1434 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 1435 if (!neigh_entry) { 1436 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); 1437 if (IS_ERR(neigh_entry)) { 1438 err = -EINVAL; 1439 goto err_neigh_entry_create; 1440 } 1441 } 1442 1443 /* If that is the first nexthop connected to that neigh, add to 1444 * nexthop_neighs_list 1445 */ 1446 if (list_empty(&neigh_entry->nexthop_list)) 1447 list_add_tail(&neigh_entry->nexthop_neighs_list_node, 1448 &mlxsw_sp->router.nexthop_neighs_list); 1449 1450 nh->neigh_entry = neigh_entry; 1451 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); 1452 read_lock_bh(&n->lock); 1453 nud_state = n->nud_state; 1454 dead = n->dead; 1455 read_unlock_bh(&n->lock); 1456 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead)); 1457 1458 return 0; 1459 1460 err_neigh_entry_create: 1461 neigh_release(n); 1462 return err; 1463 } 1464 1465 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, 1466 struct mlxsw_sp_nexthop *nh) 1467 { 1468 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1469 struct neighbour *n; 1470 1471 if (!neigh_entry) 1472 return; 1473 n = neigh_entry->key.n; 1474 1475 __mlxsw_sp_nexthop_neigh_update(nh, true); 1476 list_del(&nh->neigh_list_node); 1477 nh->neigh_entry = NULL; 1478 1479 /* If that is the last nexthop connected to that neigh, remove from 1480 * nexthop_neighs_list 1481 */ 1482 if (list_empty(&neigh_entry->nexthop_list)) 1483 list_del(&neigh_entry->nexthop_neighs_list_node); 1484 1485 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) 1486 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 1487 1488 neigh_release(n); 1489 } 1490 1491 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, 1492 struct mlxsw_sp_nexthop_group *nh_grp, 1493 struct mlxsw_sp_nexthop *nh, 1494 struct fib_nh *fib_nh) 1495 { 1496 struct net_device *dev = fib_nh->nh_dev; 1497 struct in_device *in_dev; 1498 struct mlxsw_sp_rif *r; 1499 int err; 1500 1501 nh->nh_grp = nh_grp; 1502 nh->key.fib_nh = fib_nh; 1503 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); 1504 if (err) 1505 return err; 1506 1507 in_dev = __in_dev_get_rtnl(dev); 1508 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1509 fib_nh->nh_flags & RTNH_F_LINKDOWN) 1510 return 0; 1511 1512 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); 1513 if (!r) 1514 return 0; 1515 mlxsw_sp_nexthop_rif_init(nh, r); 1516 1517 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); 1518 if (err) 1519 goto err_nexthop_neigh_init; 1520 1521 return 0; 1522 1523 err_nexthop_neigh_init: 1524 mlxsw_sp_nexthop_remove(mlxsw_sp, nh); 1525 return err; 1526 } 1527 1528 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, 1529 struct mlxsw_sp_nexthop *nh) 1530 { 1531 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1532 mlxsw_sp_nexthop_rif_fini(nh); 1533 mlxsw_sp_nexthop_remove(mlxsw_sp, nh); 1534 } 1535 1536 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, 1537 unsigned long event, struct fib_nh *fib_nh) 1538 { 1539 struct mlxsw_sp_nexthop_key key; 1540 struct mlxsw_sp_nexthop *nh; 1541 struct mlxsw_sp_rif *r; 1542 1543 if (mlxsw_sp->router.aborted) 1544 return; 1545 1546 key.fib_nh = fib_nh; 1547 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); 1548 if (WARN_ON_ONCE(!nh)) 1549 return; 1550 1551 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); 1552 if (!r) 1553 return; 1554 1555 switch (event) { 1556 case FIB_EVENT_NH_ADD: 1557 mlxsw_sp_nexthop_rif_init(nh, r); 1558 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); 1559 break; 1560 case FIB_EVENT_NH_DEL: 1561 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1562 mlxsw_sp_nexthop_rif_fini(nh); 1563 break; 1564 } 1565 1566 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1567 } 1568 1569 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 1570 struct mlxsw_sp_rif *r) 1571 { 1572 struct mlxsw_sp_nexthop *nh, *tmp; 1573 1574 list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) { 1575 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1576 mlxsw_sp_nexthop_rif_fini(nh); 1577 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1578 } 1579 } 1580 1581 static struct mlxsw_sp_nexthop_group * 1582 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1583 { 1584 struct mlxsw_sp_nexthop_group *nh_grp; 1585 struct mlxsw_sp_nexthop *nh; 1586 struct fib_nh *fib_nh; 1587 size_t alloc_size; 1588 int i; 1589 int err; 1590 1591 alloc_size = sizeof(*nh_grp) + 1592 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); 1593 nh_grp = kzalloc(alloc_size, GFP_KERNEL); 1594 if (!nh_grp) 1595 return ERR_PTR(-ENOMEM); 1596 INIT_LIST_HEAD(&nh_grp->fib_list); 1597 nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; 1598 nh_grp->count = fi->fib_nhs; 1599 nh_grp->key.fi = fi; 1600 for (i = 0; i < nh_grp->count; i++) { 1601 nh = &nh_grp->nexthops[i]; 1602 fib_nh = &fi->fib_nh[i]; 1603 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); 1604 if (err) 1605 goto err_nexthop_init; 1606 } 1607 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); 1608 if (err) 1609 goto err_nexthop_group_insert; 1610 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1611 return nh_grp; 1612 1613 err_nexthop_group_insert: 1614 err_nexthop_init: 1615 for (i--; i >= 0; i--) { 1616 nh = &nh_grp->nexthops[i]; 1617 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1618 } 1619 kfree(nh_grp); 1620 return ERR_PTR(err); 1621 } 1622 1623 static void 1624 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, 1625 struct mlxsw_sp_nexthop_group *nh_grp) 1626 { 1627 struct mlxsw_sp_nexthop *nh; 1628 int i; 1629 1630 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); 1631 for (i = 0; i < nh_grp->count; i++) { 1632 nh = &nh_grp->nexthops[i]; 1633 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1634 } 1635 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1636 WARN_ON_ONCE(nh_grp->adj_index_valid); 1637 kfree(nh_grp); 1638 } 1639 1640 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, 1641 struct mlxsw_sp_fib_entry *fib_entry, 1642 struct fib_info *fi) 1643 { 1644 struct mlxsw_sp_nexthop_group_key key; 1645 struct mlxsw_sp_nexthop_group *nh_grp; 1646 1647 key.fi = fi; 1648 nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); 1649 if (!nh_grp) { 1650 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); 1651 if (IS_ERR(nh_grp)) 1652 return PTR_ERR(nh_grp); 1653 } 1654 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); 1655 fib_entry->nh_group = nh_grp; 1656 return 0; 1657 } 1658 1659 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, 1660 struct mlxsw_sp_fib_entry *fib_entry) 1661 { 1662 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; 1663 1664 list_del(&fib_entry->nexthop_group_node); 1665 if (!list_empty(&nh_grp->fib_list)) 1666 return; 1667 mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); 1668 } 1669 1670 static bool 1671 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) 1672 { 1673 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; 1674 1675 if (fib_entry->params.tos) 1676 return false; 1677 1678 switch (fib_entry->type) { 1679 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: 1680 return !!nh_group->adj_index_valid; 1681 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: 1682 return !!nh_group->nh_rif; 1683 default: 1684 return false; 1685 } 1686 } 1687 1688 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) 1689 { 1690 fib_entry->offloaded = true; 1691 1692 switch (fib_entry->fib_node->vr->proto) { 1693 case MLXSW_SP_L3_PROTO_IPV4: 1694 fib_info_offload_inc(fib_entry->nh_group->key.fi); 1695 break; 1696 case MLXSW_SP_L3_PROTO_IPV6: 1697 WARN_ON_ONCE(1); 1698 } 1699 } 1700 1701 static void 1702 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) 1703 { 1704 switch (fib_entry->fib_node->vr->proto) { 1705 case MLXSW_SP_L3_PROTO_IPV4: 1706 fib_info_offload_dec(fib_entry->nh_group->key.fi); 1707 break; 1708 case MLXSW_SP_L3_PROTO_IPV6: 1709 WARN_ON_ONCE(1); 1710 } 1711 1712 fib_entry->offloaded = false; 1713 } 1714 1715 static void 1716 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, 1717 enum mlxsw_reg_ralue_op op, int err) 1718 { 1719 switch (op) { 1720 case MLXSW_REG_RALUE_OP_WRITE_DELETE: 1721 if (!fib_entry->offloaded) 1722 return; 1723 return mlxsw_sp_fib_entry_offload_unset(fib_entry); 1724 case MLXSW_REG_RALUE_OP_WRITE_WRITE: 1725 if (err) 1726 return; 1727 if (mlxsw_sp_fib_entry_should_offload(fib_entry) && 1728 !fib_entry->offloaded) 1729 mlxsw_sp_fib_entry_offload_set(fib_entry); 1730 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) && 1731 fib_entry->offloaded) 1732 mlxsw_sp_fib_entry_offload_unset(fib_entry); 1733 return; 1734 default: 1735 return; 1736 } 1737 } 1738 1739 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, 1740 struct mlxsw_sp_fib_entry *fib_entry, 1741 enum mlxsw_reg_ralue_op op) 1742 { 1743 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1744 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; 1745 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; 1746 enum mlxsw_reg_ralue_trap_action trap_action; 1747 u16 trap_id = 0; 1748 u32 adjacency_index = 0; 1749 u16 ecmp_size = 0; 1750 1751 /* In case the nexthop group adjacency index is valid, use it 1752 * with provided ECMP size. Otherwise, setup trap and pass 1753 * traffic to kernel. 1754 */ 1755 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { 1756 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1757 adjacency_index = fib_entry->nh_group->adj_index; 1758 ecmp_size = fib_entry->nh_group->ecmp_size; 1759 } else { 1760 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; 1761 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; 1762 } 1763 1764 mlxsw_reg_ralue_pack4(ralue_pl, 1765 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1766 vr->id, fib_entry->fib_node->key.prefix_len, 1767 *p_dip); 1768 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, 1769 adjacency_index, ecmp_size); 1770 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1771 } 1772 1773 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, 1774 struct mlxsw_sp_fib_entry *fib_entry, 1775 enum mlxsw_reg_ralue_op op) 1776 { 1777 struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif; 1778 enum mlxsw_reg_ralue_trap_action trap_action; 1779 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1780 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; 1781 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; 1782 u16 trap_id = 0; 1783 u16 rif = 0; 1784 1785 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { 1786 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1787 rif = r->rif; 1788 } else { 1789 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; 1790 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; 1791 } 1792 1793 mlxsw_reg_ralue_pack4(ralue_pl, 1794 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1795 vr->id, fib_entry->fib_node->key.prefix_len, 1796 *p_dip); 1797 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif); 1798 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1799 } 1800 1801 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, 1802 struct mlxsw_sp_fib_entry *fib_entry, 1803 enum mlxsw_reg_ralue_op op) 1804 { 1805 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1806 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; 1807 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; 1808 1809 mlxsw_reg_ralue_pack4(ralue_pl, 1810 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1811 vr->id, fib_entry->fib_node->key.prefix_len, 1812 *p_dip); 1813 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 1814 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1815 } 1816 1817 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, 1818 struct mlxsw_sp_fib_entry *fib_entry, 1819 enum mlxsw_reg_ralue_op op) 1820 { 1821 switch (fib_entry->type) { 1822 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: 1823 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); 1824 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: 1825 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); 1826 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: 1827 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); 1828 } 1829 return -EINVAL; 1830 } 1831 1832 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, 1833 struct mlxsw_sp_fib_entry *fib_entry, 1834 enum mlxsw_reg_ralue_op op) 1835 { 1836 int err = -EINVAL; 1837 1838 switch (fib_entry->fib_node->vr->proto) { 1839 case MLXSW_SP_L3_PROTO_IPV4: 1840 err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); 1841 break; 1842 case MLXSW_SP_L3_PROTO_IPV6: 1843 return err; 1844 } 1845 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); 1846 return err; 1847 } 1848 1849 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1850 struct mlxsw_sp_fib_entry *fib_entry) 1851 { 1852 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1853 MLXSW_REG_RALUE_OP_WRITE_WRITE); 1854 } 1855 1856 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, 1857 struct mlxsw_sp_fib_entry *fib_entry) 1858 { 1859 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1860 MLXSW_REG_RALUE_OP_WRITE_DELETE); 1861 } 1862 1863 static int 1864 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, 1865 const struct fib_entry_notifier_info *fen_info, 1866 struct mlxsw_sp_fib_entry *fib_entry) 1867 { 1868 struct fib_info *fi = fen_info->fi; 1869 1870 if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { 1871 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1872 return 0; 1873 } 1874 if (fen_info->type != RTN_UNICAST) 1875 return -EINVAL; 1876 if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) 1877 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; 1878 else 1879 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; 1880 return 0; 1881 } 1882 1883 static struct mlxsw_sp_fib_entry * 1884 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, 1885 struct mlxsw_sp_fib_node *fib_node, 1886 const struct fib_entry_notifier_info *fen_info) 1887 { 1888 struct mlxsw_sp_fib_entry *fib_entry; 1889 int err; 1890 1891 fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); 1892 if (!fib_entry) { 1893 err = -ENOMEM; 1894 goto err_fib_entry_alloc; 1895 } 1896 1897 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); 1898 if (err) 1899 goto err_fib4_entry_type_set; 1900 1901 err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi); 1902 if (err) 1903 goto err_nexthop_group_get; 1904 1905 fib_entry->params.prio = fen_info->fi->fib_priority; 1906 fib_entry->params.tb_id = fen_info->tb_id; 1907 fib_entry->params.type = fen_info->type; 1908 fib_entry->params.tos = fen_info->tos; 1909 1910 fib_entry->fib_node = fib_node; 1911 1912 return fib_entry; 1913 1914 err_nexthop_group_get: 1915 err_fib4_entry_type_set: 1916 kfree(fib_entry); 1917 err_fib_entry_alloc: 1918 return ERR_PTR(err); 1919 } 1920 1921 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, 1922 struct mlxsw_sp_fib_entry *fib_entry) 1923 { 1924 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); 1925 kfree(fib_entry); 1926 } 1927 1928 static struct mlxsw_sp_fib_node * 1929 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, 1930 const struct fib_entry_notifier_info *fen_info); 1931 1932 static struct mlxsw_sp_fib_entry * 1933 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, 1934 const struct fib_entry_notifier_info *fen_info) 1935 { 1936 struct mlxsw_sp_fib_entry *fib_entry; 1937 struct mlxsw_sp_fib_node *fib_node; 1938 1939 fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); 1940 if (IS_ERR(fib_node)) 1941 return NULL; 1942 1943 list_for_each_entry(fib_entry, &fib_node->entry_list, list) { 1944 if (fib_entry->params.tb_id == fen_info->tb_id && 1945 fib_entry->params.tos == fen_info->tos && 1946 fib_entry->params.type == fen_info->type && 1947 fib_entry->nh_group->key.fi == fen_info->fi) { 1948 return fib_entry; 1949 } 1950 } 1951 1952 return NULL; 1953 } 1954 1955 static const struct rhashtable_params mlxsw_sp_fib_ht_params = { 1956 .key_offset = offsetof(struct mlxsw_sp_fib_node, key), 1957 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node), 1958 .key_len = sizeof(struct mlxsw_sp_fib_key), 1959 .automatic_shrinking = true, 1960 }; 1961 1962 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib, 1963 struct mlxsw_sp_fib_node *fib_node) 1964 { 1965 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node, 1966 mlxsw_sp_fib_ht_params); 1967 } 1968 1969 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib, 1970 struct mlxsw_sp_fib_node *fib_node) 1971 { 1972 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node, 1973 mlxsw_sp_fib_ht_params); 1974 } 1975 1976 static struct mlxsw_sp_fib_node * 1977 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, 1978 size_t addr_len, unsigned char prefix_len) 1979 { 1980 struct mlxsw_sp_fib_key key; 1981 1982 memset(&key, 0, sizeof(key)); 1983 memcpy(key.addr, addr, addr_len); 1984 key.prefix_len = prefix_len; 1985 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); 1986 } 1987 1988 static struct mlxsw_sp_fib_node * 1989 mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr, 1990 size_t addr_len, unsigned char prefix_len) 1991 { 1992 struct mlxsw_sp_fib_node *fib_node; 1993 1994 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL); 1995 if (!fib_node) 1996 return NULL; 1997 1998 INIT_LIST_HEAD(&fib_node->entry_list); 1999 list_add(&fib_node->list, &vr->fib->node_list); 2000 memcpy(fib_node->key.addr, addr, addr_len); 2001 fib_node->key.prefix_len = prefix_len; 2002 mlxsw_sp_fib_node_insert(vr->fib, fib_node); 2003 fib_node->vr = vr; 2004 2005 return fib_node; 2006 } 2007 2008 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) 2009 { 2010 mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node); 2011 list_del(&fib_node->list); 2012 WARN_ON(!list_empty(&fib_node->entry_list)); 2013 kfree(fib_node); 2014 } 2015 2016 static bool 2017 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, 2018 const struct mlxsw_sp_fib_entry *fib_entry) 2019 { 2020 return list_first_entry(&fib_node->entry_list, 2021 struct mlxsw_sp_fib_entry, list) == fib_entry; 2022 } 2023 2024 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) 2025 { 2026 unsigned char prefix_len = fib_node->key.prefix_len; 2027 struct mlxsw_sp_fib *fib = fib_node->vr->fib; 2028 2029 if (fib->prefix_ref_count[prefix_len]++ == 0) 2030 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); 2031 } 2032 2033 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node) 2034 { 2035 unsigned char prefix_len = fib_node->key.prefix_len; 2036 struct mlxsw_sp_fib *fib = fib_node->vr->fib; 2037 2038 if (--fib->prefix_ref_count[prefix_len] == 0) 2039 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); 2040 } 2041 2042 static struct mlxsw_sp_fib_node * 2043 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, 2044 const struct fib_entry_notifier_info *fen_info) 2045 { 2046 struct mlxsw_sp_fib_node *fib_node; 2047 struct mlxsw_sp_vr *vr; 2048 int err; 2049 2050 vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, 2051 MLXSW_SP_L3_PROTO_IPV4); 2052 if (IS_ERR(vr)) 2053 return ERR_CAST(vr); 2054 2055 fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst, 2056 sizeof(fen_info->dst), 2057 fen_info->dst_len); 2058 if (fib_node) 2059 return fib_node; 2060 2061 fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst, 2062 sizeof(fen_info->dst), 2063 fen_info->dst_len); 2064 if (!fib_node) { 2065 err = -ENOMEM; 2066 goto err_fib_node_create; 2067 } 2068 2069 return fib_node; 2070 2071 err_fib_node_create: 2072 mlxsw_sp_vr_put(mlxsw_sp, vr); 2073 return ERR_PTR(err); 2074 } 2075 2076 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, 2077 struct mlxsw_sp_fib_node *fib_node) 2078 { 2079 struct mlxsw_sp_vr *vr = fib_node->vr; 2080 2081 if (!list_empty(&fib_node->entry_list)) 2082 return; 2083 mlxsw_sp_fib_node_destroy(fib_node); 2084 mlxsw_sp_vr_put(mlxsw_sp, vr); 2085 } 2086 2087 static struct mlxsw_sp_fib_entry * 2088 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, 2089 const struct mlxsw_sp_fib_entry_params *params) 2090 { 2091 struct mlxsw_sp_fib_entry *fib_entry; 2092 2093 list_for_each_entry(fib_entry, &fib_node->entry_list, list) { 2094 if (fib_entry->params.tb_id > params->tb_id) 2095 continue; 2096 if (fib_entry->params.tb_id != params->tb_id) 2097 break; 2098 if (fib_entry->params.tos > params->tos) 2099 continue; 2100 if (fib_entry->params.prio >= params->prio || 2101 fib_entry->params.tos < params->tos) 2102 return fib_entry; 2103 } 2104 2105 return NULL; 2106 } 2107 2108 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry, 2109 struct mlxsw_sp_fib_entry *new_entry) 2110 { 2111 struct mlxsw_sp_fib_node *fib_node; 2112 2113 if (WARN_ON(!fib_entry)) 2114 return -EINVAL; 2115 2116 fib_node = fib_entry->fib_node; 2117 list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) { 2118 if (fib_entry->params.tb_id != new_entry->params.tb_id || 2119 fib_entry->params.tos != new_entry->params.tos || 2120 fib_entry->params.prio != new_entry->params.prio) 2121 break; 2122 } 2123 2124 list_add_tail(&new_entry->list, &fib_entry->list); 2125 return 0; 2126 } 2127 2128 static int 2129 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node, 2130 struct mlxsw_sp_fib_entry *new_entry, 2131 bool replace, bool append) 2132 { 2133 struct mlxsw_sp_fib_entry *fib_entry; 2134 2135 fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params); 2136 2137 if (append) 2138 return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry); 2139 if (replace && WARN_ON(!fib_entry)) 2140 return -EINVAL; 2141 2142 /* Insert new entry before replaced one, so that we can later 2143 * remove the second. 2144 */ 2145 if (fib_entry) { 2146 list_add_tail(&new_entry->list, &fib_entry->list); 2147 } else { 2148 struct mlxsw_sp_fib_entry *last; 2149 2150 list_for_each_entry(last, &fib_node->entry_list, list) { 2151 if (new_entry->params.tb_id > last->params.tb_id) 2152 break; 2153 fib_entry = last; 2154 } 2155 2156 if (fib_entry) 2157 list_add(&new_entry->list, &fib_entry->list); 2158 else 2159 list_add(&new_entry->list, &fib_node->entry_list); 2160 } 2161 2162 return 0; 2163 } 2164 2165 static void 2166 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry) 2167 { 2168 list_del(&fib_entry->list); 2169 } 2170 2171 static int 2172 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, 2173 const struct mlxsw_sp_fib_node *fib_node, 2174 struct mlxsw_sp_fib_entry *fib_entry) 2175 { 2176 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) 2177 return 0; 2178 2179 /* To prevent packet loss, overwrite the previously offloaded 2180 * entry. 2181 */ 2182 if (!list_is_singular(&fib_node->entry_list)) { 2183 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; 2184 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); 2185 2186 mlxsw_sp_fib_entry_offload_refresh(n, op, 0); 2187 } 2188 2189 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 2190 } 2191 2192 static void 2193 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, 2194 const struct mlxsw_sp_fib_node *fib_node, 2195 struct mlxsw_sp_fib_entry *fib_entry) 2196 { 2197 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) 2198 return; 2199 2200 /* Promote the next entry by overwriting the deleted entry */ 2201 if (!list_is_singular(&fib_node->entry_list)) { 2202 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); 2203 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; 2204 2205 mlxsw_sp_fib_entry_update(mlxsw_sp, n); 2206 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); 2207 return; 2208 } 2209 2210 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); 2211 } 2212 2213 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, 2214 struct mlxsw_sp_fib_entry *fib_entry, 2215 bool replace, bool append) 2216 { 2217 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; 2218 int err; 2219 2220 err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace, 2221 append); 2222 if (err) 2223 return err; 2224 2225 err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry); 2226 if (err) 2227 goto err_fib4_node_entry_add; 2228 2229 mlxsw_sp_fib_node_prefix_inc(fib_node); 2230 2231 return 0; 2232 2233 err_fib4_node_entry_add: 2234 mlxsw_sp_fib4_node_list_remove(fib_entry); 2235 return err; 2236 } 2237 2238 static void 2239 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, 2240 struct mlxsw_sp_fib_entry *fib_entry) 2241 { 2242 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; 2243 2244 mlxsw_sp_fib_node_prefix_dec(fib_node); 2245 mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); 2246 mlxsw_sp_fib4_node_list_remove(fib_entry); 2247 } 2248 2249 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, 2250 struct mlxsw_sp_fib_entry *fib_entry, 2251 bool replace) 2252 { 2253 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; 2254 struct mlxsw_sp_fib_entry *replaced; 2255 2256 if (!replace) 2257 return; 2258 2259 /* We inserted the new entry before replaced one */ 2260 replaced = list_next_entry(fib_entry, list); 2261 2262 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); 2263 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); 2264 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); 2265 } 2266 2267 static int 2268 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, 2269 const struct fib_entry_notifier_info *fen_info, 2270 bool replace, bool append) 2271 { 2272 struct mlxsw_sp_fib_entry *fib_entry; 2273 struct mlxsw_sp_fib_node *fib_node; 2274 int err; 2275 2276 if (mlxsw_sp->router.aborted) 2277 return 0; 2278 2279 fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); 2280 if (IS_ERR(fib_node)) { 2281 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); 2282 return PTR_ERR(fib_node); 2283 } 2284 2285 fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); 2286 if (IS_ERR(fib_entry)) { 2287 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); 2288 err = PTR_ERR(fib_entry); 2289 goto err_fib4_entry_create; 2290 } 2291 2292 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace, 2293 append); 2294 if (err) { 2295 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); 2296 goto err_fib4_node_entry_link; 2297 } 2298 2299 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace); 2300 2301 return 0; 2302 2303 err_fib4_node_entry_link: 2304 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); 2305 err_fib4_entry_create: 2306 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); 2307 return err; 2308 } 2309 2310 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, 2311 struct fib_entry_notifier_info *fen_info) 2312 { 2313 struct mlxsw_sp_fib_entry *fib_entry; 2314 struct mlxsw_sp_fib_node *fib_node; 2315 2316 if (mlxsw_sp->router.aborted) 2317 return; 2318 2319 fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); 2320 if (WARN_ON(!fib_entry)) 2321 return; 2322 fib_node = fib_entry->fib_node; 2323 2324 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); 2325 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); 2326 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); 2327 } 2328 2329 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) 2330 { 2331 char ralta_pl[MLXSW_REG_RALTA_LEN]; 2332 char ralst_pl[MLXSW_REG_RALST_LEN]; 2333 char raltb_pl[MLXSW_REG_RALTB_LEN]; 2334 char ralue_pl[MLXSW_REG_RALUE_LEN]; 2335 int err; 2336 2337 mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, 2338 MLXSW_SP_LPM_TREE_MIN); 2339 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 2340 if (err) 2341 return err; 2342 2343 mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); 2344 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 2345 if (err) 2346 return err; 2347 2348 mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 2349 MLXSW_SP_LPM_TREE_MIN); 2350 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 2351 if (err) 2352 return err; 2353 2354 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, 2355 MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); 2356 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 2357 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 2358 } 2359 2360 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, 2361 struct mlxsw_sp_fib_node *fib_node) 2362 { 2363 struct mlxsw_sp_fib_entry *fib_entry, *tmp; 2364 2365 list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) { 2366 bool do_break = &tmp->list == &fib_node->entry_list; 2367 2368 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); 2369 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); 2370 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); 2371 /* Break when entry list is empty and node was freed. 2372 * Otherwise, we'll access freed memory in the next 2373 * iteration. 2374 */ 2375 if (do_break) 2376 break; 2377 } 2378 } 2379 2380 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, 2381 struct mlxsw_sp_fib_node *fib_node) 2382 { 2383 switch (fib_node->vr->proto) { 2384 case MLXSW_SP_L3_PROTO_IPV4: 2385 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); 2386 break; 2387 case MLXSW_SP_L3_PROTO_IPV6: 2388 WARN_ON_ONCE(1); 2389 break; 2390 } 2391 } 2392 2393 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) 2394 { 2395 struct mlxsw_sp_fib_node *fib_node, *tmp; 2396 struct mlxsw_sp_vr *vr; 2397 int i; 2398 2399 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 2400 vr = &mlxsw_sp->router.vrs[i]; 2401 2402 if (!vr->used) 2403 continue; 2404 2405 list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list, 2406 list) { 2407 bool do_break = &tmp->list == &vr->fib->node_list; 2408 2409 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node); 2410 if (do_break) 2411 break; 2412 } 2413 } 2414 } 2415 2416 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) 2417 { 2418 int err; 2419 2420 if (mlxsw_sp->router.aborted) 2421 return; 2422 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); 2423 mlxsw_sp_router_fib_flush(mlxsw_sp); 2424 mlxsw_sp->router.aborted = true; 2425 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); 2426 if (err) 2427 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); 2428 } 2429 2430 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) 2431 { 2432 char ritr_pl[MLXSW_REG_RITR_LEN]; 2433 int err; 2434 2435 mlxsw_reg_ritr_rif_pack(ritr_pl, rif); 2436 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); 2437 if (WARN_ON_ONCE(err)) 2438 return err; 2439 2440 mlxsw_reg_ritr_enable_set(ritr_pl, false); 2441 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); 2442 } 2443 2444 void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 2445 struct mlxsw_sp_rif *r) 2446 { 2447 mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif); 2448 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r); 2449 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r); 2450 } 2451 2452 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 2453 { 2454 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 2455 u64 max_rifs; 2456 int err; 2457 2458 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) 2459 return -EIO; 2460 2461 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); 2462 mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), 2463 GFP_KERNEL); 2464 if (!mlxsw_sp->rifs) 2465 return -ENOMEM; 2466 2467 mlxsw_reg_rgcr_pack(rgcr_pl, true); 2468 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); 2469 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 2470 if (err) 2471 goto err_rgcr_fail; 2472 2473 return 0; 2474 2475 err_rgcr_fail: 2476 kfree(mlxsw_sp->rifs); 2477 return err; 2478 } 2479 2480 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 2481 { 2482 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 2483 int i; 2484 2485 mlxsw_reg_rgcr_pack(rgcr_pl, false); 2486 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 2487 2488 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) 2489 WARN_ON_ONCE(mlxsw_sp->rifs[i]); 2490 2491 kfree(mlxsw_sp->rifs); 2492 } 2493 2494 struct mlxsw_sp_fib_event_work { 2495 struct work_struct work; 2496 union { 2497 struct fib_entry_notifier_info fen_info; 2498 struct fib_nh_notifier_info fnh_info; 2499 }; 2500 struct mlxsw_sp *mlxsw_sp; 2501 unsigned long event; 2502 }; 2503 2504 static void mlxsw_sp_router_fib_event_work(struct work_struct *work) 2505 { 2506 struct mlxsw_sp_fib_event_work *fib_work = 2507 container_of(work, struct mlxsw_sp_fib_event_work, work); 2508 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; 2509 bool replace, append; 2510 int err; 2511 2512 /* Protect internal structures from changes */ 2513 rtnl_lock(); 2514 switch (fib_work->event) { 2515 case FIB_EVENT_ENTRY_REPLACE: /* fall through */ 2516 case FIB_EVENT_ENTRY_APPEND: /* fall through */ 2517 case FIB_EVENT_ENTRY_ADD: 2518 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; 2519 append = fib_work->event == FIB_EVENT_ENTRY_APPEND; 2520 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, 2521 replace, append); 2522 if (err) 2523 mlxsw_sp_router_fib4_abort(mlxsw_sp); 2524 fib_info_put(fib_work->fen_info.fi); 2525 break; 2526 case FIB_EVENT_ENTRY_DEL: 2527 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); 2528 fib_info_put(fib_work->fen_info.fi); 2529 break; 2530 case FIB_EVENT_RULE_ADD: /* fall through */ 2531 case FIB_EVENT_RULE_DEL: 2532 mlxsw_sp_router_fib4_abort(mlxsw_sp); 2533 break; 2534 case FIB_EVENT_NH_ADD: /* fall through */ 2535 case FIB_EVENT_NH_DEL: 2536 mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, 2537 fib_work->fnh_info.fib_nh); 2538 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); 2539 break; 2540 } 2541 rtnl_unlock(); 2542 kfree(fib_work); 2543 } 2544 2545 /* Called with rcu_read_lock() */ 2546 static int mlxsw_sp_router_fib_event(struct notifier_block *nb, 2547 unsigned long event, void *ptr) 2548 { 2549 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 2550 struct mlxsw_sp_fib_event_work *fib_work; 2551 struct fib_notifier_info *info = ptr; 2552 2553 if (!net_eq(info->net, &init_net)) 2554 return NOTIFY_DONE; 2555 2556 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); 2557 if (WARN_ON(!fib_work)) 2558 return NOTIFY_BAD; 2559 2560 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work); 2561 fib_work->mlxsw_sp = mlxsw_sp; 2562 fib_work->event = event; 2563 2564 switch (event) { 2565 case FIB_EVENT_ENTRY_REPLACE: /* fall through */ 2566 case FIB_EVENT_ENTRY_APPEND: /* fall through */ 2567 case FIB_EVENT_ENTRY_ADD: /* fall through */ 2568 case FIB_EVENT_ENTRY_DEL: 2569 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); 2570 /* Take referece on fib_info to prevent it from being 2571 * freed while work is queued. Release it afterwards. 2572 */ 2573 fib_info_hold(fib_work->fen_info.fi); 2574 break; 2575 case FIB_EVENT_NH_ADD: /* fall through */ 2576 case FIB_EVENT_NH_DEL: 2577 memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); 2578 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); 2579 break; 2580 } 2581 2582 mlxsw_core_schedule_work(&fib_work->work); 2583 2584 return NOTIFY_DONE; 2585 } 2586 2587 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) 2588 { 2589 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 2590 2591 /* Flush pending FIB notifications and then flush the device's 2592 * table before requesting another dump. The FIB notification 2593 * block is unregistered, so no need to take RTNL. 2594 */ 2595 mlxsw_core_flush_owq(); 2596 mlxsw_sp_router_fib_flush(mlxsw_sp); 2597 } 2598 2599 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 2600 { 2601 int err; 2602 2603 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); 2604 err = __mlxsw_sp_router_init(mlxsw_sp); 2605 if (err) 2606 return err; 2607 2608 err = rhashtable_init(&mlxsw_sp->router.nexthop_ht, 2609 &mlxsw_sp_nexthop_ht_params); 2610 if (err) 2611 goto err_nexthop_ht_init; 2612 2613 err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht, 2614 &mlxsw_sp_nexthop_group_ht_params); 2615 if (err) 2616 goto err_nexthop_group_ht_init; 2617 2618 mlxsw_sp_lpm_init(mlxsw_sp); 2619 err = mlxsw_sp_vrs_init(mlxsw_sp); 2620 if (err) 2621 goto err_vrs_init; 2622 2623 err = mlxsw_sp_neigh_init(mlxsw_sp); 2624 if (err) 2625 goto err_neigh_init; 2626 2627 mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; 2628 err = register_fib_notifier(&mlxsw_sp->fib_nb, 2629 mlxsw_sp_router_fib_dump_flush); 2630 if (err) 2631 goto err_register_fib_notifier; 2632 2633 return 0; 2634 2635 err_register_fib_notifier: 2636 mlxsw_sp_neigh_fini(mlxsw_sp); 2637 err_neigh_init: 2638 mlxsw_sp_vrs_fini(mlxsw_sp); 2639 err_vrs_init: 2640 rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); 2641 err_nexthop_group_ht_init: 2642 rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); 2643 err_nexthop_ht_init: 2644 __mlxsw_sp_router_fini(mlxsw_sp); 2645 return err; 2646 } 2647 2648 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 2649 { 2650 unregister_fib_notifier(&mlxsw_sp->fib_nb); 2651 mlxsw_sp_neigh_fini(mlxsw_sp); 2652 mlxsw_sp_vrs_fini(mlxsw_sp); 2653 rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); 2654 rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); 2655 __mlxsw_sp_router_fini(mlxsw_sp); 2656 } 2657