1 /* 2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> 5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> 6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the names of the copyright holders nor the names of its 17 * contributors may be used to endorse or promote products derived from 18 * this software without specific prior written permission. 19 * 20 * Alternatively, this software may be distributed under the terms of the 21 * GNU General Public License ("GPL") version 2 as published by the Free 22 * Software Foundation. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <linux/kernel.h> 38 #include <linux/types.h> 39 #include <linux/rhashtable.h> 40 #include <linux/bitops.h> 41 #include <linux/in6.h> 42 #include <linux/notifier.h> 43 #include <linux/inetdevice.h> 44 #include <net/netevent.h> 45 #include <net/neighbour.h> 46 #include <net/arp.h> 47 #include <net/ip_fib.h> 48 49 #include "spectrum.h" 50 #include "core.h" 51 #include "reg.h" 52 53 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ 54 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) 55 56 static bool 57 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, 58 struct mlxsw_sp_prefix_usage *prefix_usage2) 59 { 60 unsigned char prefix; 61 62 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { 63 if (!test_bit(prefix, prefix_usage2->b)) 64 return false; 65 } 66 return true; 67 } 68 69 static bool 70 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, 71 struct mlxsw_sp_prefix_usage *prefix_usage2) 72 { 73 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 74 } 75 76 static bool 77 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) 78 { 79 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; 80 81 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); 82 } 83 84 static void 85 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, 86 struct mlxsw_sp_prefix_usage *prefix_usage2) 87 { 88 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 89 } 90 91 static void 92 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) 93 { 94 memset(prefix_usage, 0, sizeof(*prefix_usage)); 95 } 96 97 static void 98 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, 99 unsigned char prefix_len) 100 { 101 set_bit(prefix_len, prefix_usage->b); 102 } 103 104 static void 105 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, 106 unsigned char prefix_len) 107 { 108 clear_bit(prefix_len, prefix_usage->b); 109 } 110 111 struct mlxsw_sp_fib_key { 112 unsigned char addr[sizeof(struct in6_addr)]; 113 unsigned char prefix_len; 114 }; 115 116 enum mlxsw_sp_fib_entry_type { 117 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, 118 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, 119 MLXSW_SP_FIB_ENTRY_TYPE_TRAP, 120 }; 121 122 struct mlxsw_sp_nexthop_group; 123 124 struct mlxsw_sp_fib_node { 125 struct list_head entry_list; 126 struct list_head list; 127 struct rhash_head ht_node; 128 struct mlxsw_sp_vr *vr; 129 struct mlxsw_sp_fib_key key; 130 }; 131 132 struct mlxsw_sp_fib_entry_params { 133 u32 tb_id; 134 u32 prio; 135 u8 tos; 136 u8 type; 137 }; 138 139 struct mlxsw_sp_fib_entry { 140 struct list_head list; 141 struct mlxsw_sp_fib_node *fib_node; 142 enum mlxsw_sp_fib_entry_type type; 143 struct list_head nexthop_group_node; 144 struct mlxsw_sp_nexthop_group *nh_group; 145 struct mlxsw_sp_fib_entry_params params; 146 bool offloaded; 147 }; 148 149 struct mlxsw_sp_fib { 150 struct rhashtable ht; 151 struct list_head node_list; 152 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; 153 struct mlxsw_sp_prefix_usage prefix_usage; 154 }; 155 156 static const struct rhashtable_params mlxsw_sp_fib_ht_params; 157 158 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) 159 { 160 struct mlxsw_sp_fib *fib; 161 int err; 162 163 fib = kzalloc(sizeof(*fib), GFP_KERNEL); 164 if (!fib) 165 return ERR_PTR(-ENOMEM); 166 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); 167 if (err) 168 goto err_rhashtable_init; 169 INIT_LIST_HEAD(&fib->node_list); 170 return fib; 171 172 err_rhashtable_init: 173 kfree(fib); 174 return ERR_PTR(err); 175 } 176 177 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) 178 { 179 WARN_ON(!list_empty(&fib->node_list)); 180 rhashtable_destroy(&fib->ht); 181 kfree(fib); 182 } 183 184 static struct mlxsw_sp_lpm_tree * 185 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) 186 { 187 static struct mlxsw_sp_lpm_tree *lpm_tree; 188 int i; 189 190 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 191 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 192 if (lpm_tree->ref_count == 0) { 193 if (one_reserved) 194 one_reserved = false; 195 else 196 return lpm_tree; 197 } 198 } 199 return NULL; 200 } 201 202 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, 203 struct mlxsw_sp_lpm_tree *lpm_tree) 204 { 205 char ralta_pl[MLXSW_REG_RALTA_LEN]; 206 207 mlxsw_reg_ralta_pack(ralta_pl, true, 208 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 209 lpm_tree->id); 210 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 211 } 212 213 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, 214 struct mlxsw_sp_lpm_tree *lpm_tree) 215 { 216 char ralta_pl[MLXSW_REG_RALTA_LEN]; 217 218 mlxsw_reg_ralta_pack(ralta_pl, false, 219 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 220 lpm_tree->id); 221 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 222 } 223 224 static int 225 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, 226 struct mlxsw_sp_prefix_usage *prefix_usage, 227 struct mlxsw_sp_lpm_tree *lpm_tree) 228 { 229 char ralst_pl[MLXSW_REG_RALST_LEN]; 230 u8 root_bin = 0; 231 u8 prefix; 232 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; 233 234 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) 235 root_bin = prefix; 236 237 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); 238 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { 239 if (prefix == 0) 240 continue; 241 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, 242 MLXSW_REG_RALST_BIN_NO_CHILD); 243 last_prefix = prefix; 244 } 245 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 246 } 247 248 static struct mlxsw_sp_lpm_tree * 249 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, 250 struct mlxsw_sp_prefix_usage *prefix_usage, 251 enum mlxsw_sp_l3proto proto, bool one_reserved) 252 { 253 struct mlxsw_sp_lpm_tree *lpm_tree; 254 int err; 255 256 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); 257 if (!lpm_tree) 258 return ERR_PTR(-EBUSY); 259 lpm_tree->proto = proto; 260 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); 261 if (err) 262 return ERR_PTR(err); 263 264 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, 265 lpm_tree); 266 if (err) 267 goto err_left_struct_set; 268 memcpy(&lpm_tree->prefix_usage, prefix_usage, 269 sizeof(lpm_tree->prefix_usage)); 270 return lpm_tree; 271 272 err_left_struct_set: 273 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 274 return ERR_PTR(err); 275 } 276 277 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, 278 struct mlxsw_sp_lpm_tree *lpm_tree) 279 { 280 return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 281 } 282 283 static struct mlxsw_sp_lpm_tree * 284 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, 285 struct mlxsw_sp_prefix_usage *prefix_usage, 286 enum mlxsw_sp_l3proto proto, bool one_reserved) 287 { 288 struct mlxsw_sp_lpm_tree *lpm_tree; 289 int i; 290 291 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 292 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 293 if (lpm_tree->ref_count != 0 && 294 lpm_tree->proto == proto && 295 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, 296 prefix_usage)) 297 goto inc_ref_count; 298 } 299 lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, 300 proto, one_reserved); 301 if (IS_ERR(lpm_tree)) 302 return lpm_tree; 303 304 inc_ref_count: 305 lpm_tree->ref_count++; 306 return lpm_tree; 307 } 308 309 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, 310 struct mlxsw_sp_lpm_tree *lpm_tree) 311 { 312 if (--lpm_tree->ref_count == 0) 313 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); 314 return 0; 315 } 316 317 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) 318 { 319 struct mlxsw_sp_lpm_tree *lpm_tree; 320 int i; 321 322 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 323 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 324 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; 325 } 326 } 327 328 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) 329 { 330 struct mlxsw_sp_vr *vr; 331 int i; 332 333 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 334 vr = &mlxsw_sp->router.vrs[i]; 335 if (!vr->used) 336 return vr; 337 } 338 return NULL; 339 } 340 341 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, 342 struct mlxsw_sp_vr *vr) 343 { 344 char raltb_pl[MLXSW_REG_RALTB_LEN]; 345 346 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 347 (enum mlxsw_reg_ralxx_protocol) vr->proto, 348 vr->lpm_tree->id); 349 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 350 } 351 352 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, 353 struct mlxsw_sp_vr *vr) 354 { 355 char raltb_pl[MLXSW_REG_RALTB_LEN]; 356 357 /* Bind to tree 0 which is default */ 358 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 359 (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); 360 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 361 } 362 363 static u32 mlxsw_sp_fix_tb_id(u32 tb_id) 364 { 365 /* For our purpose, squash main and local table into one */ 366 if (tb_id == RT_TABLE_LOCAL) 367 tb_id = RT_TABLE_MAIN; 368 return tb_id; 369 } 370 371 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, 372 u32 tb_id, 373 enum mlxsw_sp_l3proto proto) 374 { 375 struct mlxsw_sp_vr *vr; 376 int i; 377 378 tb_id = mlxsw_sp_fix_tb_id(tb_id); 379 380 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 381 vr = &mlxsw_sp->router.vrs[i]; 382 if (vr->used && vr->proto == proto && vr->tb_id == tb_id) 383 return vr; 384 } 385 return NULL; 386 } 387 388 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, 389 unsigned char prefix_len, 390 u32 tb_id, 391 enum mlxsw_sp_l3proto proto) 392 { 393 struct mlxsw_sp_prefix_usage req_prefix_usage; 394 struct mlxsw_sp_lpm_tree *lpm_tree; 395 struct mlxsw_sp_vr *vr; 396 int err; 397 398 vr = mlxsw_sp_vr_find_unused(mlxsw_sp); 399 if (!vr) 400 return ERR_PTR(-EBUSY); 401 vr->fib = mlxsw_sp_fib_create(); 402 if (IS_ERR(vr->fib)) 403 return ERR_CAST(vr->fib); 404 405 vr->proto = proto; 406 vr->tb_id = tb_id; 407 mlxsw_sp_prefix_usage_zero(&req_prefix_usage); 408 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 409 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, 410 proto, true); 411 if (IS_ERR(lpm_tree)) { 412 err = PTR_ERR(lpm_tree); 413 goto err_tree_get; 414 } 415 vr->lpm_tree = lpm_tree; 416 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 417 if (err) 418 goto err_tree_bind; 419 420 vr->used = true; 421 return vr; 422 423 err_tree_bind: 424 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 425 err_tree_get: 426 mlxsw_sp_fib_destroy(vr->fib); 427 428 return ERR_PTR(err); 429 } 430 431 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, 432 struct mlxsw_sp_vr *vr) 433 { 434 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 435 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 436 mlxsw_sp_fib_destroy(vr->fib); 437 vr->used = false; 438 } 439 440 static int 441 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, 442 struct mlxsw_sp_prefix_usage *req_prefix_usage) 443 { 444 struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree; 445 struct mlxsw_sp_lpm_tree *new_tree; 446 int err; 447 448 if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage)) 449 return 0; 450 451 new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, 452 vr->proto, false); 453 if (IS_ERR(new_tree)) { 454 /* We failed to get a tree according to the required 455 * prefix usage. However, the current tree might be still good 456 * for us if our requirement is subset of the prefixes used 457 * in the tree. 458 */ 459 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, 460 &lpm_tree->prefix_usage)) 461 return 0; 462 return PTR_ERR(new_tree); 463 } 464 465 /* Prevent packet loss by overwriting existing binding */ 466 vr->lpm_tree = new_tree; 467 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 468 if (err) 469 goto err_tree_bind; 470 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); 471 472 return 0; 473 474 err_tree_bind: 475 vr->lpm_tree = lpm_tree; 476 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); 477 return err; 478 } 479 480 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, 481 unsigned char prefix_len, 482 u32 tb_id, 483 enum mlxsw_sp_l3proto proto) 484 { 485 struct mlxsw_sp_vr *vr; 486 int err; 487 488 tb_id = mlxsw_sp_fix_tb_id(tb_id); 489 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); 490 if (!vr) { 491 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); 492 if (IS_ERR(vr)) 493 return vr; 494 } else { 495 struct mlxsw_sp_prefix_usage req_prefix_usage; 496 497 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, 498 &vr->fib->prefix_usage); 499 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 500 /* Need to replace LPM tree in case new prefix is required. */ 501 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 502 &req_prefix_usage); 503 if (err) 504 return ERR_PTR(err); 505 } 506 return vr; 507 } 508 509 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) 510 { 511 /* Destroy virtual router entity in case the associated FIB is empty 512 * and allow it to be used for other tables in future. Otherwise, 513 * check if some prefix usage did not disappear and change tree if 514 * that is the case. Note that in case new, smaller tree cannot be 515 * allocated, the original one will be kept being used. 516 */ 517 if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) 518 mlxsw_sp_vr_destroy(mlxsw_sp, vr); 519 else 520 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 521 &vr->fib->prefix_usage); 522 } 523 524 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) 525 { 526 struct mlxsw_sp_vr *vr; 527 u64 max_vrs; 528 int i; 529 530 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS)) 531 return -EIO; 532 533 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); 534 mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr), 535 GFP_KERNEL); 536 if (!mlxsw_sp->router.vrs) 537 return -ENOMEM; 538 539 for (i = 0; i < max_vrs; i++) { 540 vr = &mlxsw_sp->router.vrs[i]; 541 vr->id = i; 542 } 543 544 return 0; 545 } 546 547 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp); 548 549 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) 550 { 551 /* At this stage we're guaranteed not to have new incoming 552 * FIB notifications and the work queue is free from FIBs 553 * sitting on top of mlxsw netdevs. However, we can still 554 * have other FIBs queued. Flush the queue before flushing 555 * the device's tables. No need for locks, as we're the only 556 * writer. 557 */ 558 mlxsw_core_flush_owq(); 559 mlxsw_sp_router_fib_flush(mlxsw_sp); 560 kfree(mlxsw_sp->router.vrs); 561 } 562 563 struct mlxsw_sp_neigh_key { 564 struct neighbour *n; 565 }; 566 567 struct mlxsw_sp_neigh_entry { 568 struct list_head rif_list_node; 569 struct rhash_head ht_node; 570 struct mlxsw_sp_neigh_key key; 571 u16 rif; 572 bool connected; 573 unsigned char ha[ETH_ALEN]; 574 struct list_head nexthop_list; /* list of nexthops using 575 * this neigh entry 576 */ 577 struct list_head nexthop_neighs_list_node; 578 }; 579 580 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { 581 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), 582 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), 583 .key_len = sizeof(struct mlxsw_sp_neigh_key), 584 }; 585 586 static struct mlxsw_sp_neigh_entry * 587 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, 588 u16 rif) 589 { 590 struct mlxsw_sp_neigh_entry *neigh_entry; 591 592 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL); 593 if (!neigh_entry) 594 return NULL; 595 596 neigh_entry->key.n = n; 597 neigh_entry->rif = rif; 598 INIT_LIST_HEAD(&neigh_entry->nexthop_list); 599 600 return neigh_entry; 601 } 602 603 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry) 604 { 605 kfree(neigh_entry); 606 } 607 608 static int 609 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, 610 struct mlxsw_sp_neigh_entry *neigh_entry) 611 { 612 return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, 613 &neigh_entry->ht_node, 614 mlxsw_sp_neigh_ht_params); 615 } 616 617 static void 618 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, 619 struct mlxsw_sp_neigh_entry *neigh_entry) 620 { 621 rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, 622 &neigh_entry->ht_node, 623 mlxsw_sp_neigh_ht_params); 624 } 625 626 static struct mlxsw_sp_neigh_entry * 627 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) 628 { 629 struct mlxsw_sp_neigh_entry *neigh_entry; 630 struct mlxsw_sp_rif *r; 631 int err; 632 633 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); 634 if (!r) 635 return ERR_PTR(-EINVAL); 636 637 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif); 638 if (!neigh_entry) 639 return ERR_PTR(-ENOMEM); 640 641 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); 642 if (err) 643 goto err_neigh_entry_insert; 644 645 list_add(&neigh_entry->rif_list_node, &r->neigh_list); 646 647 return neigh_entry; 648 649 err_neigh_entry_insert: 650 mlxsw_sp_neigh_entry_free(neigh_entry); 651 return ERR_PTR(err); 652 } 653 654 static void 655 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, 656 struct mlxsw_sp_neigh_entry *neigh_entry) 657 { 658 list_del(&neigh_entry->rif_list_node); 659 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); 660 mlxsw_sp_neigh_entry_free(neigh_entry); 661 } 662 663 static struct mlxsw_sp_neigh_entry * 664 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) 665 { 666 struct mlxsw_sp_neigh_key key; 667 668 key.n = n; 669 return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, 670 &key, mlxsw_sp_neigh_ht_params); 671 } 672 673 static void 674 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) 675 { 676 unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); 677 678 mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); 679 } 680 681 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, 682 char *rauhtd_pl, 683 int ent_index) 684 { 685 struct net_device *dev; 686 struct neighbour *n; 687 __be32 dipn; 688 u32 dip; 689 u16 rif; 690 691 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); 692 693 if (!mlxsw_sp->rifs[rif]) { 694 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); 695 return; 696 } 697 698 dipn = htonl(dip); 699 dev = mlxsw_sp->rifs[rif]->dev; 700 n = neigh_lookup(&arp_tbl, &dipn, dev); 701 if (!n) { 702 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", 703 &dip); 704 return; 705 } 706 707 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); 708 neigh_event_send(n, NULL); 709 neigh_release(n); 710 } 711 712 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, 713 char *rauhtd_pl, 714 int rec_index) 715 { 716 u8 num_entries; 717 int i; 718 719 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 720 rec_index); 721 /* Hardware starts counting at 0, so add 1. */ 722 num_entries++; 723 724 /* Each record consists of several neighbour entries. */ 725 for (i = 0; i < num_entries; i++) { 726 int ent_index; 727 728 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; 729 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, 730 ent_index); 731 } 732 733 } 734 735 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, 736 char *rauhtd_pl, int rec_index) 737 { 738 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { 739 case MLXSW_REG_RAUHTD_TYPE_IPV4: 740 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, 741 rec_index); 742 break; 743 case MLXSW_REG_RAUHTD_TYPE_IPV6: 744 WARN_ON_ONCE(1); 745 break; 746 } 747 } 748 749 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) 750 { 751 u8 num_rec, last_rec_index, num_entries; 752 753 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 754 last_rec_index = num_rec - 1; 755 756 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) 757 return false; 758 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == 759 MLXSW_REG_RAUHTD_TYPE_IPV6) 760 return true; 761 762 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 763 last_rec_index); 764 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) 765 return true; 766 return false; 767 } 768 769 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) 770 { 771 char *rauhtd_pl; 772 u8 num_rec; 773 int i, err; 774 775 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); 776 if (!rauhtd_pl) 777 return -ENOMEM; 778 779 /* Make sure the neighbour's netdev isn't removed in the 780 * process. 781 */ 782 rtnl_lock(); 783 do { 784 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); 785 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), 786 rauhtd_pl); 787 if (err) { 788 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); 789 break; 790 } 791 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 792 for (i = 0; i < num_rec; i++) 793 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, 794 i); 795 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); 796 rtnl_unlock(); 797 798 kfree(rauhtd_pl); 799 return err; 800 } 801 802 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) 803 { 804 struct mlxsw_sp_neigh_entry *neigh_entry; 805 806 /* Take RTNL mutex here to prevent lists from changes */ 807 rtnl_lock(); 808 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 809 nexthop_neighs_list_node) 810 /* If this neigh have nexthops, make the kernel think this neigh 811 * is active regardless of the traffic. 812 */ 813 neigh_event_send(neigh_entry->key.n, NULL); 814 rtnl_unlock(); 815 } 816 817 static void 818 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) 819 { 820 unsigned long interval = mlxsw_sp->router.neighs_update.interval; 821 822 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 823 msecs_to_jiffies(interval)); 824 } 825 826 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) 827 { 828 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 829 router.neighs_update.dw.work); 830 int err; 831 832 err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); 833 if (err) 834 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); 835 836 mlxsw_sp_router_neighs_update_nh(mlxsw_sp); 837 838 mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); 839 } 840 841 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) 842 { 843 struct mlxsw_sp_neigh_entry *neigh_entry; 844 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 845 router.nexthop_probe_dw.work); 846 847 /* Iterate over nexthop neighbours, find those who are unresolved and 848 * send arp on them. This solves the chicken-egg problem when 849 * the nexthop wouldn't get offloaded until the neighbor is resolved 850 * but it wouldn't get resolved ever in case traffic is flowing in HW 851 * using different nexthop. 852 * 853 * Take RTNL mutex here to prevent lists from changes. 854 */ 855 rtnl_lock(); 856 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 857 nexthop_neighs_list_node) 858 if (!neigh_entry->connected) 859 neigh_event_send(neigh_entry->key.n, NULL); 860 rtnl_unlock(); 861 862 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 863 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); 864 } 865 866 static void 867 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 868 struct mlxsw_sp_neigh_entry *neigh_entry, 869 bool removing); 870 871 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) 872 { 873 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD : 874 MLXSW_REG_RAUHT_OP_WRITE_DELETE; 875 } 876 877 static void 878 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, 879 struct mlxsw_sp_neigh_entry *neigh_entry, 880 enum mlxsw_reg_rauht_op op) 881 { 882 struct neighbour *n = neigh_entry->key.n; 883 u32 dip = ntohl(*((__be32 *) n->primary_key)); 884 char rauht_pl[MLXSW_REG_RAUHT_LEN]; 885 886 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, 887 dip); 888 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); 889 } 890 891 static void 892 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, 893 struct mlxsw_sp_neigh_entry *neigh_entry, 894 bool adding) 895 { 896 if (!adding && !neigh_entry->connected) 897 return; 898 neigh_entry->connected = adding; 899 if (neigh_entry->key.n->tbl == &arp_tbl) 900 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, 901 mlxsw_sp_rauht_op(adding)); 902 else 903 WARN_ON_ONCE(1); 904 } 905 906 struct mlxsw_sp_neigh_event_work { 907 struct work_struct work; 908 struct mlxsw_sp *mlxsw_sp; 909 struct neighbour *n; 910 }; 911 912 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) 913 { 914 struct mlxsw_sp_neigh_event_work *neigh_work = 915 container_of(work, struct mlxsw_sp_neigh_event_work, work); 916 struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp; 917 struct mlxsw_sp_neigh_entry *neigh_entry; 918 struct neighbour *n = neigh_work->n; 919 unsigned char ha[ETH_ALEN]; 920 bool entry_connected; 921 u8 nud_state, dead; 922 923 /* If these parameters are changed after we release the lock, 924 * then we are guaranteed to receive another event letting us 925 * know about it. 926 */ 927 read_lock_bh(&n->lock); 928 memcpy(ha, n->ha, ETH_ALEN); 929 nud_state = n->nud_state; 930 dead = n->dead; 931 read_unlock_bh(&n->lock); 932 933 rtnl_lock(); 934 entry_connected = nud_state & NUD_VALID && !dead; 935 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 936 if (!entry_connected && !neigh_entry) 937 goto out; 938 if (!neigh_entry) { 939 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); 940 if (IS_ERR(neigh_entry)) 941 goto out; 942 } 943 944 memcpy(neigh_entry->ha, ha, ETH_ALEN); 945 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); 946 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); 947 948 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) 949 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 950 951 out: 952 rtnl_unlock(); 953 neigh_release(n); 954 kfree(neigh_work); 955 } 956 957 int mlxsw_sp_router_netevent_event(struct notifier_block *unused, 958 unsigned long event, void *ptr) 959 { 960 struct mlxsw_sp_neigh_event_work *neigh_work; 961 struct mlxsw_sp_port *mlxsw_sp_port; 962 struct mlxsw_sp *mlxsw_sp; 963 unsigned long interval; 964 struct neigh_parms *p; 965 struct neighbour *n; 966 967 switch (event) { 968 case NETEVENT_DELAY_PROBE_TIME_UPDATE: 969 p = ptr; 970 971 /* We don't care about changes in the default table. */ 972 if (!p->dev || p->tbl != &arp_tbl) 973 return NOTIFY_DONE; 974 975 /* We are in atomic context and can't take RTNL mutex, 976 * so use RCU variant to walk the device chain. 977 */ 978 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); 979 if (!mlxsw_sp_port) 980 return NOTIFY_DONE; 981 982 mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 983 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); 984 mlxsw_sp->router.neighs_update.interval = interval; 985 986 mlxsw_sp_port_dev_put(mlxsw_sp_port); 987 break; 988 case NETEVENT_NEIGH_UPDATE: 989 n = ptr; 990 991 if (n->tbl != &arp_tbl) 992 return NOTIFY_DONE; 993 994 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); 995 if (!mlxsw_sp_port) 996 return NOTIFY_DONE; 997 998 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC); 999 if (!neigh_work) { 1000 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1001 return NOTIFY_BAD; 1002 } 1003 1004 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work); 1005 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 1006 neigh_work->n = n; 1007 1008 /* Take a reference to ensure the neighbour won't be 1009 * destructed until we drop the reference in delayed 1010 * work. 1011 */ 1012 neigh_clone(n); 1013 mlxsw_core_schedule_work(&neigh_work->work); 1014 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1015 break; 1016 } 1017 1018 return NOTIFY_DONE; 1019 } 1020 1021 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) 1022 { 1023 int err; 1024 1025 err = rhashtable_init(&mlxsw_sp->router.neigh_ht, 1026 &mlxsw_sp_neigh_ht_params); 1027 if (err) 1028 return err; 1029 1030 /* Initialize the polling interval according to the default 1031 * table. 1032 */ 1033 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); 1034 1035 /* Create the delayed works for the activity_update */ 1036 INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, 1037 mlxsw_sp_router_neighs_update_work); 1038 INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, 1039 mlxsw_sp_router_probe_unresolved_nexthops); 1040 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); 1041 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); 1042 return 0; 1043 } 1044 1045 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) 1046 { 1047 cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); 1048 cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); 1049 rhashtable_destroy(&mlxsw_sp->router.neigh_ht); 1050 } 1051 1052 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, 1053 const struct mlxsw_sp_rif *r) 1054 { 1055 char rauht_pl[MLXSW_REG_RAUHT_LEN]; 1056 1057 mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, 1058 r->rif, r->addr); 1059 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); 1060 } 1061 1062 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 1063 struct mlxsw_sp_rif *r) 1064 { 1065 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; 1066 1067 mlxsw_sp_neigh_rif_flush(mlxsw_sp, r); 1068 list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list, 1069 rif_list_node) 1070 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 1071 } 1072 1073 struct mlxsw_sp_nexthop_key { 1074 struct fib_nh *fib_nh; 1075 }; 1076 1077 struct mlxsw_sp_nexthop { 1078 struct list_head neigh_list_node; /* member of neigh entry list */ 1079 struct list_head rif_list_node; 1080 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group 1081 * this belongs to 1082 */ 1083 struct rhash_head ht_node; 1084 struct mlxsw_sp_nexthop_key key; 1085 struct mlxsw_sp_rif *r; 1086 u8 should_offload:1, /* set indicates this neigh is connected and 1087 * should be put to KVD linear area of this group. 1088 */ 1089 offloaded:1, /* set in case the neigh is actually put into 1090 * KVD linear area of this group. 1091 */ 1092 update:1; /* set indicates that MAC of this neigh should be 1093 * updated in HW 1094 */ 1095 struct mlxsw_sp_neigh_entry *neigh_entry; 1096 }; 1097 1098 struct mlxsw_sp_nexthop_group_key { 1099 struct fib_info *fi; 1100 }; 1101 1102 struct mlxsw_sp_nexthop_group { 1103 struct rhash_head ht_node; 1104 struct list_head fib_list; /* list of fib entries that use this group */ 1105 struct mlxsw_sp_nexthop_group_key key; 1106 u8 adj_index_valid:1, 1107 gateway:1; /* routes using the group use a gateway */ 1108 u32 adj_index; 1109 u16 ecmp_size; 1110 u16 count; 1111 struct mlxsw_sp_nexthop nexthops[0]; 1112 #define nh_rif nexthops[0].r 1113 }; 1114 1115 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { 1116 .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key), 1117 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node), 1118 .key_len = sizeof(struct mlxsw_sp_nexthop_group_key), 1119 }; 1120 1121 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, 1122 struct mlxsw_sp_nexthop_group *nh_grp) 1123 { 1124 return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht, 1125 &nh_grp->ht_node, 1126 mlxsw_sp_nexthop_group_ht_params); 1127 } 1128 1129 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, 1130 struct mlxsw_sp_nexthop_group *nh_grp) 1131 { 1132 rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht, 1133 &nh_grp->ht_node, 1134 mlxsw_sp_nexthop_group_ht_params); 1135 } 1136 1137 static struct mlxsw_sp_nexthop_group * 1138 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp, 1139 struct mlxsw_sp_nexthop_group_key key) 1140 { 1141 return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key, 1142 mlxsw_sp_nexthop_group_ht_params); 1143 } 1144 1145 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = { 1146 .key_offset = offsetof(struct mlxsw_sp_nexthop, key), 1147 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node), 1148 .key_len = sizeof(struct mlxsw_sp_nexthop_key), 1149 }; 1150 1151 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp, 1152 struct mlxsw_sp_nexthop *nh) 1153 { 1154 return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht, 1155 &nh->ht_node, mlxsw_sp_nexthop_ht_params); 1156 } 1157 1158 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp, 1159 struct mlxsw_sp_nexthop *nh) 1160 { 1161 rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node, 1162 mlxsw_sp_nexthop_ht_params); 1163 } 1164 1165 static struct mlxsw_sp_nexthop * 1166 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, 1167 struct mlxsw_sp_nexthop_key key) 1168 { 1169 return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key, 1170 mlxsw_sp_nexthop_ht_params); 1171 } 1172 1173 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, 1174 struct mlxsw_sp_vr *vr, 1175 u32 adj_index, u16 ecmp_size, 1176 u32 new_adj_index, 1177 u16 new_ecmp_size) 1178 { 1179 char raleu_pl[MLXSW_REG_RALEU_LEN]; 1180 1181 mlxsw_reg_raleu_pack(raleu_pl, 1182 (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, 1183 adj_index, ecmp_size, new_adj_index, 1184 new_ecmp_size); 1185 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); 1186 } 1187 1188 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, 1189 struct mlxsw_sp_nexthop_group *nh_grp, 1190 u32 old_adj_index, u16 old_ecmp_size) 1191 { 1192 struct mlxsw_sp_fib_entry *fib_entry; 1193 struct mlxsw_sp_vr *vr = NULL; 1194 int err; 1195 1196 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1197 if (vr == fib_entry->fib_node->vr) 1198 continue; 1199 vr = fib_entry->fib_node->vr; 1200 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, 1201 old_adj_index, 1202 old_ecmp_size, 1203 nh_grp->adj_index, 1204 nh_grp->ecmp_size); 1205 if (err) 1206 return err; 1207 } 1208 return 0; 1209 } 1210 1211 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, 1212 struct mlxsw_sp_nexthop *nh) 1213 { 1214 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1215 char ratr_pl[MLXSW_REG_RATR_LEN]; 1216 1217 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, 1218 true, adj_index, neigh_entry->rif); 1219 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); 1220 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); 1221 } 1222 1223 static int 1224 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, 1225 struct mlxsw_sp_nexthop_group *nh_grp, 1226 bool reallocate) 1227 { 1228 u32 adj_index = nh_grp->adj_index; /* base */ 1229 struct mlxsw_sp_nexthop *nh; 1230 int i; 1231 int err; 1232 1233 for (i = 0; i < nh_grp->count; i++) { 1234 nh = &nh_grp->nexthops[i]; 1235 1236 if (!nh->should_offload) { 1237 nh->offloaded = 0; 1238 continue; 1239 } 1240 1241 if (nh->update || reallocate) { 1242 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, 1243 adj_index, nh); 1244 if (err) 1245 return err; 1246 nh->update = 0; 1247 nh->offloaded = 1; 1248 } 1249 adj_index++; 1250 } 1251 return 0; 1252 } 1253 1254 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1255 struct mlxsw_sp_fib_entry *fib_entry); 1256 1257 static int 1258 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, 1259 struct mlxsw_sp_nexthop_group *nh_grp) 1260 { 1261 struct mlxsw_sp_fib_entry *fib_entry; 1262 int err; 1263 1264 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1265 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 1266 if (err) 1267 return err; 1268 } 1269 return 0; 1270 } 1271 1272 static void 1273 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, 1274 struct mlxsw_sp_nexthop_group *nh_grp) 1275 { 1276 struct mlxsw_sp_nexthop *nh; 1277 bool offload_change = false; 1278 u32 adj_index; 1279 u16 ecmp_size = 0; 1280 bool old_adj_index_valid; 1281 u32 old_adj_index; 1282 u16 old_ecmp_size; 1283 int ret; 1284 int i; 1285 int err; 1286 1287 if (!nh_grp->gateway) { 1288 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1289 return; 1290 } 1291 1292 for (i = 0; i < nh_grp->count; i++) { 1293 nh = &nh_grp->nexthops[i]; 1294 1295 if (nh->should_offload ^ nh->offloaded) { 1296 offload_change = true; 1297 if (nh->should_offload) 1298 nh->update = 1; 1299 } 1300 if (nh->should_offload) 1301 ecmp_size++; 1302 } 1303 if (!offload_change) { 1304 /* Nothing was added or removed, so no need to reallocate. Just 1305 * update MAC on existing adjacency indexes. 1306 */ 1307 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, 1308 false); 1309 if (err) { 1310 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1311 goto set_trap; 1312 } 1313 return; 1314 } 1315 if (!ecmp_size) 1316 /* No neigh of this group is connected so we just set 1317 * the trap and let everthing flow through kernel. 1318 */ 1319 goto set_trap; 1320 1321 ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); 1322 if (ret < 0) { 1323 /* We ran out of KVD linear space, just set the 1324 * trap and let everything flow through kernel. 1325 */ 1326 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); 1327 goto set_trap; 1328 } 1329 adj_index = ret; 1330 old_adj_index_valid = nh_grp->adj_index_valid; 1331 old_adj_index = nh_grp->adj_index; 1332 old_ecmp_size = nh_grp->ecmp_size; 1333 nh_grp->adj_index_valid = 1; 1334 nh_grp->adj_index = adj_index; 1335 nh_grp->ecmp_size = ecmp_size; 1336 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true); 1337 if (err) { 1338 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1339 goto set_trap; 1340 } 1341 1342 if (!old_adj_index_valid) { 1343 /* The trap was set for fib entries, so we have to call 1344 * fib entry update to unset it and use adjacency index. 1345 */ 1346 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1347 if (err) { 1348 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); 1349 goto set_trap; 1350 } 1351 return; 1352 } 1353 1354 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, 1355 old_adj_index, old_ecmp_size); 1356 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); 1357 if (err) { 1358 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); 1359 goto set_trap; 1360 } 1361 return; 1362 1363 set_trap: 1364 old_adj_index_valid = nh_grp->adj_index_valid; 1365 nh_grp->adj_index_valid = 0; 1366 for (i = 0; i < nh_grp->count; i++) { 1367 nh = &nh_grp->nexthops[i]; 1368 nh->offloaded = 0; 1369 } 1370 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1371 if (err) 1372 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); 1373 if (old_adj_index_valid) 1374 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); 1375 } 1376 1377 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, 1378 bool removing) 1379 { 1380 if (!removing && !nh->should_offload) 1381 nh->should_offload = 1; 1382 else if (removing && nh->offloaded) 1383 nh->should_offload = 0; 1384 nh->update = 1; 1385 } 1386 1387 static void 1388 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 1389 struct mlxsw_sp_neigh_entry *neigh_entry, 1390 bool removing) 1391 { 1392 struct mlxsw_sp_nexthop *nh; 1393 1394 list_for_each_entry(nh, &neigh_entry->nexthop_list, 1395 neigh_list_node) { 1396 __mlxsw_sp_nexthop_neigh_update(nh, removing); 1397 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1398 } 1399 } 1400 1401 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, 1402 struct mlxsw_sp_rif *r) 1403 { 1404 if (nh->r) 1405 return; 1406 1407 nh->r = r; 1408 list_add(&nh->rif_list_node, &r->nexthop_list); 1409 } 1410 1411 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) 1412 { 1413 if (!nh->r) 1414 return; 1415 1416 list_del(&nh->rif_list_node); 1417 nh->r = NULL; 1418 } 1419 1420 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, 1421 struct mlxsw_sp_nexthop *nh) 1422 { 1423 struct mlxsw_sp_neigh_entry *neigh_entry; 1424 struct fib_nh *fib_nh = nh->key.fib_nh; 1425 struct neighbour *n; 1426 u8 nud_state, dead; 1427 int err; 1428 1429 if (!nh->nh_grp->gateway || nh->neigh_entry) 1430 return 0; 1431 1432 /* Take a reference of neigh here ensuring that neigh would 1433 * not be detructed before the nexthop entry is finished. 1434 * The reference is taken either in neigh_lookup() or 1435 * in neigh_create() in case n is not found. 1436 */ 1437 n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); 1438 if (!n) { 1439 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); 1440 if (IS_ERR(n)) 1441 return PTR_ERR(n); 1442 neigh_event_send(n, NULL); 1443 } 1444 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 1445 if (!neigh_entry) { 1446 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); 1447 if (IS_ERR(neigh_entry)) { 1448 err = -EINVAL; 1449 goto err_neigh_entry_create; 1450 } 1451 } 1452 1453 /* If that is the first nexthop connected to that neigh, add to 1454 * nexthop_neighs_list 1455 */ 1456 if (list_empty(&neigh_entry->nexthop_list)) 1457 list_add_tail(&neigh_entry->nexthop_neighs_list_node, 1458 &mlxsw_sp->router.nexthop_neighs_list); 1459 1460 nh->neigh_entry = neigh_entry; 1461 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); 1462 read_lock_bh(&n->lock); 1463 nud_state = n->nud_state; 1464 dead = n->dead; 1465 read_unlock_bh(&n->lock); 1466 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead)); 1467 1468 return 0; 1469 1470 err_neigh_entry_create: 1471 neigh_release(n); 1472 return err; 1473 } 1474 1475 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, 1476 struct mlxsw_sp_nexthop *nh) 1477 { 1478 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1479 struct neighbour *n; 1480 1481 if (!neigh_entry) 1482 return; 1483 n = neigh_entry->key.n; 1484 1485 __mlxsw_sp_nexthop_neigh_update(nh, true); 1486 list_del(&nh->neigh_list_node); 1487 nh->neigh_entry = NULL; 1488 1489 /* If that is the last nexthop connected to that neigh, remove from 1490 * nexthop_neighs_list 1491 */ 1492 if (list_empty(&neigh_entry->nexthop_list)) 1493 list_del(&neigh_entry->nexthop_neighs_list_node); 1494 1495 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) 1496 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 1497 1498 neigh_release(n); 1499 } 1500 1501 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, 1502 struct mlxsw_sp_nexthop_group *nh_grp, 1503 struct mlxsw_sp_nexthop *nh, 1504 struct fib_nh *fib_nh) 1505 { 1506 struct net_device *dev = fib_nh->nh_dev; 1507 struct in_device *in_dev; 1508 struct mlxsw_sp_rif *r; 1509 int err; 1510 1511 nh->nh_grp = nh_grp; 1512 nh->key.fib_nh = fib_nh; 1513 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); 1514 if (err) 1515 return err; 1516 1517 in_dev = __in_dev_get_rtnl(dev); 1518 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1519 fib_nh->nh_flags & RTNH_F_LINKDOWN) 1520 return 0; 1521 1522 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); 1523 if (!r) 1524 return 0; 1525 mlxsw_sp_nexthop_rif_init(nh, r); 1526 1527 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); 1528 if (err) 1529 goto err_nexthop_neigh_init; 1530 1531 return 0; 1532 1533 err_nexthop_neigh_init: 1534 mlxsw_sp_nexthop_remove(mlxsw_sp, nh); 1535 return err; 1536 } 1537 1538 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, 1539 struct mlxsw_sp_nexthop *nh) 1540 { 1541 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1542 mlxsw_sp_nexthop_rif_fini(nh); 1543 mlxsw_sp_nexthop_remove(mlxsw_sp, nh); 1544 } 1545 1546 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, 1547 unsigned long event, struct fib_nh *fib_nh) 1548 { 1549 struct mlxsw_sp_nexthop_key key; 1550 struct mlxsw_sp_nexthop *nh; 1551 struct mlxsw_sp_rif *r; 1552 1553 if (mlxsw_sp->router.aborted) 1554 return; 1555 1556 key.fib_nh = fib_nh; 1557 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); 1558 if (WARN_ON_ONCE(!nh)) 1559 return; 1560 1561 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); 1562 if (!r) 1563 return; 1564 1565 switch (event) { 1566 case FIB_EVENT_NH_ADD: 1567 mlxsw_sp_nexthop_rif_init(nh, r); 1568 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); 1569 break; 1570 case FIB_EVENT_NH_DEL: 1571 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1572 mlxsw_sp_nexthop_rif_fini(nh); 1573 break; 1574 } 1575 1576 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1577 } 1578 1579 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 1580 struct mlxsw_sp_rif *r) 1581 { 1582 struct mlxsw_sp_nexthop *nh, *tmp; 1583 1584 list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) { 1585 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1586 mlxsw_sp_nexthop_rif_fini(nh); 1587 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1588 } 1589 } 1590 1591 static struct mlxsw_sp_nexthop_group * 1592 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1593 { 1594 struct mlxsw_sp_nexthop_group *nh_grp; 1595 struct mlxsw_sp_nexthop *nh; 1596 struct fib_nh *fib_nh; 1597 size_t alloc_size; 1598 int i; 1599 int err; 1600 1601 alloc_size = sizeof(*nh_grp) + 1602 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); 1603 nh_grp = kzalloc(alloc_size, GFP_KERNEL); 1604 if (!nh_grp) 1605 return ERR_PTR(-ENOMEM); 1606 INIT_LIST_HEAD(&nh_grp->fib_list); 1607 nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; 1608 nh_grp->count = fi->fib_nhs; 1609 nh_grp->key.fi = fi; 1610 for (i = 0; i < nh_grp->count; i++) { 1611 nh = &nh_grp->nexthops[i]; 1612 fib_nh = &fi->fib_nh[i]; 1613 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); 1614 if (err) 1615 goto err_nexthop_init; 1616 } 1617 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); 1618 if (err) 1619 goto err_nexthop_group_insert; 1620 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1621 return nh_grp; 1622 1623 err_nexthop_group_insert: 1624 err_nexthop_init: 1625 for (i--; i >= 0; i--) { 1626 nh = &nh_grp->nexthops[i]; 1627 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1628 } 1629 kfree(nh_grp); 1630 return ERR_PTR(err); 1631 } 1632 1633 static void 1634 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, 1635 struct mlxsw_sp_nexthop_group *nh_grp) 1636 { 1637 struct mlxsw_sp_nexthop *nh; 1638 int i; 1639 1640 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); 1641 for (i = 0; i < nh_grp->count; i++) { 1642 nh = &nh_grp->nexthops[i]; 1643 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1644 } 1645 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1646 WARN_ON_ONCE(nh_grp->adj_index_valid); 1647 kfree(nh_grp); 1648 } 1649 1650 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, 1651 struct mlxsw_sp_fib_entry *fib_entry, 1652 struct fib_info *fi) 1653 { 1654 struct mlxsw_sp_nexthop_group_key key; 1655 struct mlxsw_sp_nexthop_group *nh_grp; 1656 1657 key.fi = fi; 1658 nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); 1659 if (!nh_grp) { 1660 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); 1661 if (IS_ERR(nh_grp)) 1662 return PTR_ERR(nh_grp); 1663 } 1664 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); 1665 fib_entry->nh_group = nh_grp; 1666 return 0; 1667 } 1668 1669 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, 1670 struct mlxsw_sp_fib_entry *fib_entry) 1671 { 1672 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; 1673 1674 list_del(&fib_entry->nexthop_group_node); 1675 if (!list_empty(&nh_grp->fib_list)) 1676 return; 1677 mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); 1678 } 1679 1680 static bool 1681 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) 1682 { 1683 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; 1684 1685 if (fib_entry->params.tos) 1686 return false; 1687 1688 switch (fib_entry->type) { 1689 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: 1690 return !!nh_group->adj_index_valid; 1691 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: 1692 return !!nh_group->nh_rif; 1693 default: 1694 return false; 1695 } 1696 } 1697 1698 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) 1699 { 1700 fib_entry->offloaded = true; 1701 1702 switch (fib_entry->fib_node->vr->proto) { 1703 case MLXSW_SP_L3_PROTO_IPV4: 1704 fib_info_offload_inc(fib_entry->nh_group->key.fi); 1705 break; 1706 case MLXSW_SP_L3_PROTO_IPV6: 1707 WARN_ON_ONCE(1); 1708 } 1709 } 1710 1711 static void 1712 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) 1713 { 1714 switch (fib_entry->fib_node->vr->proto) { 1715 case MLXSW_SP_L3_PROTO_IPV4: 1716 fib_info_offload_dec(fib_entry->nh_group->key.fi); 1717 break; 1718 case MLXSW_SP_L3_PROTO_IPV6: 1719 WARN_ON_ONCE(1); 1720 } 1721 1722 fib_entry->offloaded = false; 1723 } 1724 1725 static void 1726 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, 1727 enum mlxsw_reg_ralue_op op, int err) 1728 { 1729 switch (op) { 1730 case MLXSW_REG_RALUE_OP_WRITE_DELETE: 1731 if (!fib_entry->offloaded) 1732 return; 1733 return mlxsw_sp_fib_entry_offload_unset(fib_entry); 1734 case MLXSW_REG_RALUE_OP_WRITE_WRITE: 1735 if (err) 1736 return; 1737 if (mlxsw_sp_fib_entry_should_offload(fib_entry) && 1738 !fib_entry->offloaded) 1739 mlxsw_sp_fib_entry_offload_set(fib_entry); 1740 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) && 1741 fib_entry->offloaded) 1742 mlxsw_sp_fib_entry_offload_unset(fib_entry); 1743 return; 1744 default: 1745 return; 1746 } 1747 } 1748 1749 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, 1750 struct mlxsw_sp_fib_entry *fib_entry, 1751 enum mlxsw_reg_ralue_op op) 1752 { 1753 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1754 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; 1755 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; 1756 enum mlxsw_reg_ralue_trap_action trap_action; 1757 u16 trap_id = 0; 1758 u32 adjacency_index = 0; 1759 u16 ecmp_size = 0; 1760 1761 /* In case the nexthop group adjacency index is valid, use it 1762 * with provided ECMP size. Otherwise, setup trap and pass 1763 * traffic to kernel. 1764 */ 1765 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { 1766 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1767 adjacency_index = fib_entry->nh_group->adj_index; 1768 ecmp_size = fib_entry->nh_group->ecmp_size; 1769 } else { 1770 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; 1771 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; 1772 } 1773 1774 mlxsw_reg_ralue_pack4(ralue_pl, 1775 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1776 vr->id, fib_entry->fib_node->key.prefix_len, 1777 *p_dip); 1778 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, 1779 adjacency_index, ecmp_size); 1780 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1781 } 1782 1783 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, 1784 struct mlxsw_sp_fib_entry *fib_entry, 1785 enum mlxsw_reg_ralue_op op) 1786 { 1787 struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif; 1788 enum mlxsw_reg_ralue_trap_action trap_action; 1789 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1790 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; 1791 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; 1792 u16 trap_id = 0; 1793 u16 rif = 0; 1794 1795 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { 1796 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1797 rif = r->rif; 1798 } else { 1799 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; 1800 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; 1801 } 1802 1803 mlxsw_reg_ralue_pack4(ralue_pl, 1804 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1805 vr->id, fib_entry->fib_node->key.prefix_len, 1806 *p_dip); 1807 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif); 1808 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1809 } 1810 1811 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, 1812 struct mlxsw_sp_fib_entry *fib_entry, 1813 enum mlxsw_reg_ralue_op op) 1814 { 1815 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1816 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; 1817 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; 1818 1819 mlxsw_reg_ralue_pack4(ralue_pl, 1820 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1821 vr->id, fib_entry->fib_node->key.prefix_len, 1822 *p_dip); 1823 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 1824 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1825 } 1826 1827 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, 1828 struct mlxsw_sp_fib_entry *fib_entry, 1829 enum mlxsw_reg_ralue_op op) 1830 { 1831 switch (fib_entry->type) { 1832 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: 1833 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); 1834 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: 1835 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); 1836 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: 1837 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); 1838 } 1839 return -EINVAL; 1840 } 1841 1842 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, 1843 struct mlxsw_sp_fib_entry *fib_entry, 1844 enum mlxsw_reg_ralue_op op) 1845 { 1846 int err = -EINVAL; 1847 1848 switch (fib_entry->fib_node->vr->proto) { 1849 case MLXSW_SP_L3_PROTO_IPV4: 1850 err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); 1851 break; 1852 case MLXSW_SP_L3_PROTO_IPV6: 1853 return err; 1854 } 1855 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); 1856 return err; 1857 } 1858 1859 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1860 struct mlxsw_sp_fib_entry *fib_entry) 1861 { 1862 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1863 MLXSW_REG_RALUE_OP_WRITE_WRITE); 1864 } 1865 1866 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, 1867 struct mlxsw_sp_fib_entry *fib_entry) 1868 { 1869 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1870 MLXSW_REG_RALUE_OP_WRITE_DELETE); 1871 } 1872 1873 static int 1874 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, 1875 const struct fib_entry_notifier_info *fen_info, 1876 struct mlxsw_sp_fib_entry *fib_entry) 1877 { 1878 struct fib_info *fi = fen_info->fi; 1879 1880 if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { 1881 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1882 return 0; 1883 } 1884 if (fen_info->type != RTN_UNICAST) 1885 return -EINVAL; 1886 if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) 1887 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; 1888 else 1889 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; 1890 return 0; 1891 } 1892 1893 static struct mlxsw_sp_fib_entry * 1894 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, 1895 struct mlxsw_sp_fib_node *fib_node, 1896 const struct fib_entry_notifier_info *fen_info) 1897 { 1898 struct mlxsw_sp_fib_entry *fib_entry; 1899 int err; 1900 1901 fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); 1902 if (!fib_entry) { 1903 err = -ENOMEM; 1904 goto err_fib_entry_alloc; 1905 } 1906 1907 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); 1908 if (err) 1909 goto err_fib4_entry_type_set; 1910 1911 err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi); 1912 if (err) 1913 goto err_nexthop_group_get; 1914 1915 fib_entry->params.prio = fen_info->fi->fib_priority; 1916 fib_entry->params.tb_id = fen_info->tb_id; 1917 fib_entry->params.type = fen_info->type; 1918 fib_entry->params.tos = fen_info->tos; 1919 1920 fib_entry->fib_node = fib_node; 1921 1922 return fib_entry; 1923 1924 err_nexthop_group_get: 1925 err_fib4_entry_type_set: 1926 kfree(fib_entry); 1927 err_fib_entry_alloc: 1928 return ERR_PTR(err); 1929 } 1930 1931 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, 1932 struct mlxsw_sp_fib_entry *fib_entry) 1933 { 1934 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); 1935 kfree(fib_entry); 1936 } 1937 1938 static struct mlxsw_sp_fib_node * 1939 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, 1940 const struct fib_entry_notifier_info *fen_info); 1941 1942 static struct mlxsw_sp_fib_entry * 1943 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, 1944 const struct fib_entry_notifier_info *fen_info) 1945 { 1946 struct mlxsw_sp_fib_entry *fib_entry; 1947 struct mlxsw_sp_fib_node *fib_node; 1948 1949 fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); 1950 if (IS_ERR(fib_node)) 1951 return NULL; 1952 1953 list_for_each_entry(fib_entry, &fib_node->entry_list, list) { 1954 if (fib_entry->params.tb_id == fen_info->tb_id && 1955 fib_entry->params.tos == fen_info->tos && 1956 fib_entry->params.type == fen_info->type && 1957 fib_entry->nh_group->key.fi == fen_info->fi) { 1958 return fib_entry; 1959 } 1960 } 1961 1962 return NULL; 1963 } 1964 1965 static const struct rhashtable_params mlxsw_sp_fib_ht_params = { 1966 .key_offset = offsetof(struct mlxsw_sp_fib_node, key), 1967 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node), 1968 .key_len = sizeof(struct mlxsw_sp_fib_key), 1969 .automatic_shrinking = true, 1970 }; 1971 1972 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib, 1973 struct mlxsw_sp_fib_node *fib_node) 1974 { 1975 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node, 1976 mlxsw_sp_fib_ht_params); 1977 } 1978 1979 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib, 1980 struct mlxsw_sp_fib_node *fib_node) 1981 { 1982 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node, 1983 mlxsw_sp_fib_ht_params); 1984 } 1985 1986 static struct mlxsw_sp_fib_node * 1987 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, 1988 size_t addr_len, unsigned char prefix_len) 1989 { 1990 struct mlxsw_sp_fib_key key; 1991 1992 memset(&key, 0, sizeof(key)); 1993 memcpy(key.addr, addr, addr_len); 1994 key.prefix_len = prefix_len; 1995 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); 1996 } 1997 1998 static struct mlxsw_sp_fib_node * 1999 mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr, 2000 size_t addr_len, unsigned char prefix_len) 2001 { 2002 struct mlxsw_sp_fib_node *fib_node; 2003 2004 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL); 2005 if (!fib_node) 2006 return NULL; 2007 2008 INIT_LIST_HEAD(&fib_node->entry_list); 2009 list_add(&fib_node->list, &vr->fib->node_list); 2010 memcpy(fib_node->key.addr, addr, addr_len); 2011 fib_node->key.prefix_len = prefix_len; 2012 mlxsw_sp_fib_node_insert(vr->fib, fib_node); 2013 fib_node->vr = vr; 2014 2015 return fib_node; 2016 } 2017 2018 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) 2019 { 2020 mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node); 2021 list_del(&fib_node->list); 2022 WARN_ON(!list_empty(&fib_node->entry_list)); 2023 kfree(fib_node); 2024 } 2025 2026 static bool 2027 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, 2028 const struct mlxsw_sp_fib_entry *fib_entry) 2029 { 2030 return list_first_entry(&fib_node->entry_list, 2031 struct mlxsw_sp_fib_entry, list) == fib_entry; 2032 } 2033 2034 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) 2035 { 2036 unsigned char prefix_len = fib_node->key.prefix_len; 2037 struct mlxsw_sp_fib *fib = fib_node->vr->fib; 2038 2039 if (fib->prefix_ref_count[prefix_len]++ == 0) 2040 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); 2041 } 2042 2043 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node) 2044 { 2045 unsigned char prefix_len = fib_node->key.prefix_len; 2046 struct mlxsw_sp_fib *fib = fib_node->vr->fib; 2047 2048 if (--fib->prefix_ref_count[prefix_len] == 0) 2049 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); 2050 } 2051 2052 static struct mlxsw_sp_fib_node * 2053 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, 2054 const struct fib_entry_notifier_info *fen_info) 2055 { 2056 struct mlxsw_sp_fib_node *fib_node; 2057 struct mlxsw_sp_vr *vr; 2058 int err; 2059 2060 vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, 2061 MLXSW_SP_L3_PROTO_IPV4); 2062 if (IS_ERR(vr)) 2063 return ERR_CAST(vr); 2064 2065 fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst, 2066 sizeof(fen_info->dst), 2067 fen_info->dst_len); 2068 if (fib_node) 2069 return fib_node; 2070 2071 fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst, 2072 sizeof(fen_info->dst), 2073 fen_info->dst_len); 2074 if (!fib_node) { 2075 err = -ENOMEM; 2076 goto err_fib_node_create; 2077 } 2078 2079 return fib_node; 2080 2081 err_fib_node_create: 2082 mlxsw_sp_vr_put(mlxsw_sp, vr); 2083 return ERR_PTR(err); 2084 } 2085 2086 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, 2087 struct mlxsw_sp_fib_node *fib_node) 2088 { 2089 struct mlxsw_sp_vr *vr = fib_node->vr; 2090 2091 if (!list_empty(&fib_node->entry_list)) 2092 return; 2093 mlxsw_sp_fib_node_destroy(fib_node); 2094 mlxsw_sp_vr_put(mlxsw_sp, vr); 2095 } 2096 2097 static struct mlxsw_sp_fib_entry * 2098 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, 2099 const struct mlxsw_sp_fib_entry_params *params) 2100 { 2101 struct mlxsw_sp_fib_entry *fib_entry; 2102 2103 list_for_each_entry(fib_entry, &fib_node->entry_list, list) { 2104 if (fib_entry->params.tb_id > params->tb_id) 2105 continue; 2106 if (fib_entry->params.tb_id != params->tb_id) 2107 break; 2108 if (fib_entry->params.tos > params->tos) 2109 continue; 2110 if (fib_entry->params.prio >= params->prio || 2111 fib_entry->params.tos < params->tos) 2112 return fib_entry; 2113 } 2114 2115 return NULL; 2116 } 2117 2118 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry, 2119 struct mlxsw_sp_fib_entry *new_entry) 2120 { 2121 struct mlxsw_sp_fib_node *fib_node; 2122 2123 if (WARN_ON(!fib_entry)) 2124 return -EINVAL; 2125 2126 fib_node = fib_entry->fib_node; 2127 list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) { 2128 if (fib_entry->params.tb_id != new_entry->params.tb_id || 2129 fib_entry->params.tos != new_entry->params.tos || 2130 fib_entry->params.prio != new_entry->params.prio) 2131 break; 2132 } 2133 2134 list_add_tail(&new_entry->list, &fib_entry->list); 2135 return 0; 2136 } 2137 2138 static int 2139 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node, 2140 struct mlxsw_sp_fib_entry *new_entry, 2141 bool replace, bool append) 2142 { 2143 struct mlxsw_sp_fib_entry *fib_entry; 2144 2145 fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params); 2146 2147 if (append) 2148 return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry); 2149 if (replace && WARN_ON(!fib_entry)) 2150 return -EINVAL; 2151 2152 /* Insert new entry before replaced one, so that we can later 2153 * remove the second. 2154 */ 2155 if (fib_entry) { 2156 list_add_tail(&new_entry->list, &fib_entry->list); 2157 } else { 2158 struct mlxsw_sp_fib_entry *last; 2159 2160 list_for_each_entry(last, &fib_node->entry_list, list) { 2161 if (new_entry->params.tb_id > last->params.tb_id) 2162 break; 2163 fib_entry = last; 2164 } 2165 2166 if (fib_entry) 2167 list_add(&new_entry->list, &fib_entry->list); 2168 else 2169 list_add(&new_entry->list, &fib_node->entry_list); 2170 } 2171 2172 return 0; 2173 } 2174 2175 static void 2176 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry) 2177 { 2178 list_del(&fib_entry->list); 2179 } 2180 2181 static int 2182 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, 2183 const struct mlxsw_sp_fib_node *fib_node, 2184 struct mlxsw_sp_fib_entry *fib_entry) 2185 { 2186 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) 2187 return 0; 2188 2189 /* To prevent packet loss, overwrite the previously offloaded 2190 * entry. 2191 */ 2192 if (!list_is_singular(&fib_node->entry_list)) { 2193 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; 2194 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); 2195 2196 mlxsw_sp_fib_entry_offload_refresh(n, op, 0); 2197 } 2198 2199 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 2200 } 2201 2202 static void 2203 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, 2204 const struct mlxsw_sp_fib_node *fib_node, 2205 struct mlxsw_sp_fib_entry *fib_entry) 2206 { 2207 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) 2208 return; 2209 2210 /* Promote the next entry by overwriting the deleted entry */ 2211 if (!list_is_singular(&fib_node->entry_list)) { 2212 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); 2213 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; 2214 2215 mlxsw_sp_fib_entry_update(mlxsw_sp, n); 2216 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); 2217 return; 2218 } 2219 2220 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); 2221 } 2222 2223 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, 2224 struct mlxsw_sp_fib_entry *fib_entry, 2225 bool replace, bool append) 2226 { 2227 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; 2228 int err; 2229 2230 err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace, 2231 append); 2232 if (err) 2233 return err; 2234 2235 err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry); 2236 if (err) 2237 goto err_fib4_node_entry_add; 2238 2239 mlxsw_sp_fib_node_prefix_inc(fib_node); 2240 2241 return 0; 2242 2243 err_fib4_node_entry_add: 2244 mlxsw_sp_fib4_node_list_remove(fib_entry); 2245 return err; 2246 } 2247 2248 static void 2249 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, 2250 struct mlxsw_sp_fib_entry *fib_entry) 2251 { 2252 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; 2253 2254 mlxsw_sp_fib_node_prefix_dec(fib_node); 2255 mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); 2256 mlxsw_sp_fib4_node_list_remove(fib_entry); 2257 } 2258 2259 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, 2260 struct mlxsw_sp_fib_entry *fib_entry, 2261 bool replace) 2262 { 2263 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; 2264 struct mlxsw_sp_fib_entry *replaced; 2265 2266 if (!replace) 2267 return; 2268 2269 /* We inserted the new entry before replaced one */ 2270 replaced = list_next_entry(fib_entry, list); 2271 2272 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); 2273 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); 2274 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); 2275 } 2276 2277 static int 2278 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, 2279 const struct fib_entry_notifier_info *fen_info, 2280 bool replace, bool append) 2281 { 2282 struct mlxsw_sp_fib_entry *fib_entry; 2283 struct mlxsw_sp_fib_node *fib_node; 2284 int err; 2285 2286 if (mlxsw_sp->router.aborted) 2287 return 0; 2288 2289 fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); 2290 if (IS_ERR(fib_node)) { 2291 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); 2292 return PTR_ERR(fib_node); 2293 } 2294 2295 fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); 2296 if (IS_ERR(fib_entry)) { 2297 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); 2298 err = PTR_ERR(fib_entry); 2299 goto err_fib4_entry_create; 2300 } 2301 2302 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace, 2303 append); 2304 if (err) { 2305 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); 2306 goto err_fib4_node_entry_link; 2307 } 2308 2309 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace); 2310 2311 return 0; 2312 2313 err_fib4_node_entry_link: 2314 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); 2315 err_fib4_entry_create: 2316 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); 2317 return err; 2318 } 2319 2320 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, 2321 struct fib_entry_notifier_info *fen_info) 2322 { 2323 struct mlxsw_sp_fib_entry *fib_entry; 2324 struct mlxsw_sp_fib_node *fib_node; 2325 2326 if (mlxsw_sp->router.aborted) 2327 return; 2328 2329 fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); 2330 if (WARN_ON(!fib_entry)) 2331 return; 2332 fib_node = fib_entry->fib_node; 2333 2334 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); 2335 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); 2336 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); 2337 } 2338 2339 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) 2340 { 2341 char ralta_pl[MLXSW_REG_RALTA_LEN]; 2342 char ralst_pl[MLXSW_REG_RALST_LEN]; 2343 char raltb_pl[MLXSW_REG_RALTB_LEN]; 2344 char ralue_pl[MLXSW_REG_RALUE_LEN]; 2345 int err; 2346 2347 mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, 2348 MLXSW_SP_LPM_TREE_MIN); 2349 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 2350 if (err) 2351 return err; 2352 2353 mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); 2354 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 2355 if (err) 2356 return err; 2357 2358 mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 2359 MLXSW_SP_LPM_TREE_MIN); 2360 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 2361 if (err) 2362 return err; 2363 2364 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, 2365 MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); 2366 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 2367 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 2368 } 2369 2370 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, 2371 struct mlxsw_sp_fib_node *fib_node) 2372 { 2373 struct mlxsw_sp_fib_entry *fib_entry, *tmp; 2374 2375 list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) { 2376 bool do_break = &tmp->list == &fib_node->entry_list; 2377 2378 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); 2379 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); 2380 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); 2381 /* Break when entry list is empty and node was freed. 2382 * Otherwise, we'll access freed memory in the next 2383 * iteration. 2384 */ 2385 if (do_break) 2386 break; 2387 } 2388 } 2389 2390 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, 2391 struct mlxsw_sp_fib_node *fib_node) 2392 { 2393 switch (fib_node->vr->proto) { 2394 case MLXSW_SP_L3_PROTO_IPV4: 2395 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); 2396 break; 2397 case MLXSW_SP_L3_PROTO_IPV6: 2398 WARN_ON_ONCE(1); 2399 break; 2400 } 2401 } 2402 2403 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) 2404 { 2405 struct mlxsw_sp_fib_node *fib_node, *tmp; 2406 struct mlxsw_sp_vr *vr; 2407 int i; 2408 2409 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 2410 vr = &mlxsw_sp->router.vrs[i]; 2411 2412 if (!vr->used) 2413 continue; 2414 2415 list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list, 2416 list) { 2417 bool do_break = &tmp->list == &vr->fib->node_list; 2418 2419 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node); 2420 if (do_break) 2421 break; 2422 } 2423 } 2424 } 2425 2426 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) 2427 { 2428 int err; 2429 2430 if (mlxsw_sp->router.aborted) 2431 return; 2432 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); 2433 mlxsw_sp_router_fib_flush(mlxsw_sp); 2434 mlxsw_sp->router.aborted = true; 2435 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); 2436 if (err) 2437 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); 2438 } 2439 2440 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) 2441 { 2442 char ritr_pl[MLXSW_REG_RITR_LEN]; 2443 int err; 2444 2445 mlxsw_reg_ritr_rif_pack(ritr_pl, rif); 2446 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); 2447 if (WARN_ON_ONCE(err)) 2448 return err; 2449 2450 mlxsw_reg_ritr_enable_set(ritr_pl, false); 2451 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); 2452 } 2453 2454 void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 2455 struct mlxsw_sp_rif *r) 2456 { 2457 mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif); 2458 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r); 2459 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r); 2460 } 2461 2462 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 2463 { 2464 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 2465 u64 max_rifs; 2466 int err; 2467 2468 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) 2469 return -EIO; 2470 2471 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); 2472 mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), 2473 GFP_KERNEL); 2474 if (!mlxsw_sp->rifs) 2475 return -ENOMEM; 2476 2477 mlxsw_reg_rgcr_pack(rgcr_pl, true); 2478 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); 2479 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 2480 if (err) 2481 goto err_rgcr_fail; 2482 2483 return 0; 2484 2485 err_rgcr_fail: 2486 kfree(mlxsw_sp->rifs); 2487 return err; 2488 } 2489 2490 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 2491 { 2492 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 2493 int i; 2494 2495 mlxsw_reg_rgcr_pack(rgcr_pl, false); 2496 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 2497 2498 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) 2499 WARN_ON_ONCE(mlxsw_sp->rifs[i]); 2500 2501 kfree(mlxsw_sp->rifs); 2502 } 2503 2504 struct mlxsw_sp_fib_event_work { 2505 struct work_struct work; 2506 union { 2507 struct fib_entry_notifier_info fen_info; 2508 struct fib_nh_notifier_info fnh_info; 2509 }; 2510 struct mlxsw_sp *mlxsw_sp; 2511 unsigned long event; 2512 }; 2513 2514 static void mlxsw_sp_router_fib_event_work(struct work_struct *work) 2515 { 2516 struct mlxsw_sp_fib_event_work *fib_work = 2517 container_of(work, struct mlxsw_sp_fib_event_work, work); 2518 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; 2519 bool replace, append; 2520 int err; 2521 2522 /* Protect internal structures from changes */ 2523 rtnl_lock(); 2524 switch (fib_work->event) { 2525 case FIB_EVENT_ENTRY_REPLACE: /* fall through */ 2526 case FIB_EVENT_ENTRY_APPEND: /* fall through */ 2527 case FIB_EVENT_ENTRY_ADD: 2528 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; 2529 append = fib_work->event == FIB_EVENT_ENTRY_APPEND; 2530 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, 2531 replace, append); 2532 if (err) 2533 mlxsw_sp_router_fib4_abort(mlxsw_sp); 2534 fib_info_put(fib_work->fen_info.fi); 2535 break; 2536 case FIB_EVENT_ENTRY_DEL: 2537 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); 2538 fib_info_put(fib_work->fen_info.fi); 2539 break; 2540 case FIB_EVENT_RULE_ADD: /* fall through */ 2541 case FIB_EVENT_RULE_DEL: 2542 mlxsw_sp_router_fib4_abort(mlxsw_sp); 2543 break; 2544 case FIB_EVENT_NH_ADD: /* fall through */ 2545 case FIB_EVENT_NH_DEL: 2546 mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, 2547 fib_work->fnh_info.fib_nh); 2548 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); 2549 break; 2550 } 2551 rtnl_unlock(); 2552 kfree(fib_work); 2553 } 2554 2555 /* Called with rcu_read_lock() */ 2556 static int mlxsw_sp_router_fib_event(struct notifier_block *nb, 2557 unsigned long event, void *ptr) 2558 { 2559 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 2560 struct mlxsw_sp_fib_event_work *fib_work; 2561 struct fib_notifier_info *info = ptr; 2562 2563 if (!net_eq(info->net, &init_net)) 2564 return NOTIFY_DONE; 2565 2566 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); 2567 if (WARN_ON(!fib_work)) 2568 return NOTIFY_BAD; 2569 2570 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work); 2571 fib_work->mlxsw_sp = mlxsw_sp; 2572 fib_work->event = event; 2573 2574 switch (event) { 2575 case FIB_EVENT_ENTRY_REPLACE: /* fall through */ 2576 case FIB_EVENT_ENTRY_APPEND: /* fall through */ 2577 case FIB_EVENT_ENTRY_ADD: /* fall through */ 2578 case FIB_EVENT_ENTRY_DEL: 2579 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); 2580 /* Take referece on fib_info to prevent it from being 2581 * freed while work is queued. Release it afterwards. 2582 */ 2583 fib_info_hold(fib_work->fen_info.fi); 2584 break; 2585 case FIB_EVENT_NH_ADD: /* fall through */ 2586 case FIB_EVENT_NH_DEL: 2587 memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); 2588 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); 2589 break; 2590 } 2591 2592 mlxsw_core_schedule_work(&fib_work->work); 2593 2594 return NOTIFY_DONE; 2595 } 2596 2597 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) 2598 { 2599 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 2600 2601 /* Flush pending FIB notifications and then flush the device's 2602 * table before requesting another dump. The FIB notification 2603 * block is unregistered, so no need to take RTNL. 2604 */ 2605 mlxsw_core_flush_owq(); 2606 mlxsw_sp_router_fib_flush(mlxsw_sp); 2607 } 2608 2609 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 2610 { 2611 int err; 2612 2613 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); 2614 err = __mlxsw_sp_router_init(mlxsw_sp); 2615 if (err) 2616 return err; 2617 2618 err = rhashtable_init(&mlxsw_sp->router.nexthop_ht, 2619 &mlxsw_sp_nexthop_ht_params); 2620 if (err) 2621 goto err_nexthop_ht_init; 2622 2623 err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht, 2624 &mlxsw_sp_nexthop_group_ht_params); 2625 if (err) 2626 goto err_nexthop_group_ht_init; 2627 2628 mlxsw_sp_lpm_init(mlxsw_sp); 2629 err = mlxsw_sp_vrs_init(mlxsw_sp); 2630 if (err) 2631 goto err_vrs_init; 2632 2633 err = mlxsw_sp_neigh_init(mlxsw_sp); 2634 if (err) 2635 goto err_neigh_init; 2636 2637 mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; 2638 err = register_fib_notifier(&mlxsw_sp->fib_nb, 2639 mlxsw_sp_router_fib_dump_flush); 2640 if (err) 2641 goto err_register_fib_notifier; 2642 2643 return 0; 2644 2645 err_register_fib_notifier: 2646 mlxsw_sp_neigh_fini(mlxsw_sp); 2647 err_neigh_init: 2648 mlxsw_sp_vrs_fini(mlxsw_sp); 2649 err_vrs_init: 2650 rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); 2651 err_nexthop_group_ht_init: 2652 rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); 2653 err_nexthop_ht_init: 2654 __mlxsw_sp_router_fini(mlxsw_sp); 2655 return err; 2656 } 2657 2658 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 2659 { 2660 unregister_fib_notifier(&mlxsw_sp->fib_nb); 2661 mlxsw_sp_neigh_fini(mlxsw_sp); 2662 mlxsw_sp_vrs_fini(mlxsw_sp); 2663 rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); 2664 rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); 2665 __mlxsw_sp_router_fini(mlxsw_sp); 2666 } 2667