1 /* 2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> 5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> 6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the names of the copyright holders nor the names of its 17 * contributors may be used to endorse or promote products derived from 18 * this software without specific prior written permission. 19 * 20 * Alternatively, this software may be distributed under the terms of the 21 * GNU General Public License ("GPL") version 2 as published by the Free 22 * Software Foundation. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <linux/kernel.h> 38 #include <linux/types.h> 39 #include <linux/rhashtable.h> 40 #include <linux/bitops.h> 41 #include <linux/in6.h> 42 #include <linux/notifier.h> 43 #include <net/netevent.h> 44 #include <net/neighbour.h> 45 #include <net/arp.h> 46 #include <net/ip_fib.h> 47 48 #include "spectrum.h" 49 #include "core.h" 50 #include "reg.h" 51 52 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ 53 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) 54 55 static bool 56 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, 57 struct mlxsw_sp_prefix_usage *prefix_usage2) 58 { 59 unsigned char prefix; 60 61 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { 62 if (!test_bit(prefix, prefix_usage2->b)) 63 return false; 64 } 65 return true; 66 } 67 68 static bool 69 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, 70 struct mlxsw_sp_prefix_usage *prefix_usage2) 71 { 72 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 73 } 74 75 static bool 76 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) 77 { 78 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; 79 80 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); 81 } 82 83 static void 84 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, 85 struct mlxsw_sp_prefix_usage *prefix_usage2) 86 { 87 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 88 } 89 90 static void 91 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) 92 { 93 memset(prefix_usage, 0, sizeof(*prefix_usage)); 94 } 95 96 static void 97 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, 98 unsigned char prefix_len) 99 { 100 set_bit(prefix_len, prefix_usage->b); 101 } 102 103 static void 104 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, 105 unsigned char prefix_len) 106 { 107 clear_bit(prefix_len, prefix_usage->b); 108 } 109 110 struct mlxsw_sp_fib_key { 111 struct net_device *dev; 112 unsigned char addr[sizeof(struct in6_addr)]; 113 unsigned char prefix_len; 114 }; 115 116 enum mlxsw_sp_fib_entry_type { 117 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, 118 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, 119 MLXSW_SP_FIB_ENTRY_TYPE_TRAP, 120 }; 121 122 struct mlxsw_sp_nexthop_group; 123 124 struct mlxsw_sp_fib_entry { 125 struct rhash_head ht_node; 126 struct list_head list; 127 struct mlxsw_sp_fib_key key; 128 enum mlxsw_sp_fib_entry_type type; 129 unsigned int ref_count; 130 u16 rif; /* used for action local */ 131 struct mlxsw_sp_vr *vr; 132 struct fib_info *fi; 133 struct list_head nexthop_group_node; 134 struct mlxsw_sp_nexthop_group *nh_group; 135 }; 136 137 struct mlxsw_sp_fib { 138 struct rhashtable ht; 139 struct list_head entry_list; 140 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; 141 struct mlxsw_sp_prefix_usage prefix_usage; 142 }; 143 144 static const struct rhashtable_params mlxsw_sp_fib_ht_params = { 145 .key_offset = offsetof(struct mlxsw_sp_fib_entry, key), 146 .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node), 147 .key_len = sizeof(struct mlxsw_sp_fib_key), 148 .automatic_shrinking = true, 149 }; 150 151 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, 152 struct mlxsw_sp_fib_entry *fib_entry) 153 { 154 unsigned char prefix_len = fib_entry->key.prefix_len; 155 int err; 156 157 err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node, 158 mlxsw_sp_fib_ht_params); 159 if (err) 160 return err; 161 list_add_tail(&fib_entry->list, &fib->entry_list); 162 if (fib->prefix_ref_count[prefix_len]++ == 0) 163 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); 164 return 0; 165 } 166 167 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, 168 struct mlxsw_sp_fib_entry *fib_entry) 169 { 170 unsigned char prefix_len = fib_entry->key.prefix_len; 171 172 if (--fib->prefix_ref_count[prefix_len] == 0) 173 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); 174 list_del(&fib_entry->list); 175 rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, 176 mlxsw_sp_fib_ht_params); 177 } 178 179 static struct mlxsw_sp_fib_entry * 180 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr, 181 size_t addr_len, unsigned char prefix_len, 182 struct net_device *dev) 183 { 184 struct mlxsw_sp_fib_entry *fib_entry; 185 186 fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); 187 if (!fib_entry) 188 return NULL; 189 fib_entry->key.dev = dev; 190 memcpy(fib_entry->key.addr, addr, addr_len); 191 fib_entry->key.prefix_len = prefix_len; 192 return fib_entry; 193 } 194 195 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry) 196 { 197 kfree(fib_entry); 198 } 199 200 static struct mlxsw_sp_fib_entry * 201 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr, 202 size_t addr_len, unsigned char prefix_len, 203 struct net_device *dev) 204 { 205 struct mlxsw_sp_fib_key key; 206 207 memset(&key, 0, sizeof(key)); 208 key.dev = dev; 209 memcpy(key.addr, addr, addr_len); 210 key.prefix_len = prefix_len; 211 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); 212 } 213 214 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) 215 { 216 struct mlxsw_sp_fib *fib; 217 int err; 218 219 fib = kzalloc(sizeof(*fib), GFP_KERNEL); 220 if (!fib) 221 return ERR_PTR(-ENOMEM); 222 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); 223 if (err) 224 goto err_rhashtable_init; 225 INIT_LIST_HEAD(&fib->entry_list); 226 return fib; 227 228 err_rhashtable_init: 229 kfree(fib); 230 return ERR_PTR(err); 231 } 232 233 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) 234 { 235 rhashtable_destroy(&fib->ht); 236 kfree(fib); 237 } 238 239 static struct mlxsw_sp_lpm_tree * 240 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) 241 { 242 static struct mlxsw_sp_lpm_tree *lpm_tree; 243 int i; 244 245 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 246 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 247 if (lpm_tree->ref_count == 0) { 248 if (one_reserved) 249 one_reserved = false; 250 else 251 return lpm_tree; 252 } 253 } 254 return NULL; 255 } 256 257 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, 258 struct mlxsw_sp_lpm_tree *lpm_tree) 259 { 260 char ralta_pl[MLXSW_REG_RALTA_LEN]; 261 262 mlxsw_reg_ralta_pack(ralta_pl, true, 263 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 264 lpm_tree->id); 265 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 266 } 267 268 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, 269 struct mlxsw_sp_lpm_tree *lpm_tree) 270 { 271 char ralta_pl[MLXSW_REG_RALTA_LEN]; 272 273 mlxsw_reg_ralta_pack(ralta_pl, false, 274 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 275 lpm_tree->id); 276 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 277 } 278 279 static int 280 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, 281 struct mlxsw_sp_prefix_usage *prefix_usage, 282 struct mlxsw_sp_lpm_tree *lpm_tree) 283 { 284 char ralst_pl[MLXSW_REG_RALST_LEN]; 285 u8 root_bin = 0; 286 u8 prefix; 287 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; 288 289 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) 290 root_bin = prefix; 291 292 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); 293 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { 294 if (prefix == 0) 295 continue; 296 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, 297 MLXSW_REG_RALST_BIN_NO_CHILD); 298 last_prefix = prefix; 299 } 300 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 301 } 302 303 static struct mlxsw_sp_lpm_tree * 304 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, 305 struct mlxsw_sp_prefix_usage *prefix_usage, 306 enum mlxsw_sp_l3proto proto, bool one_reserved) 307 { 308 struct mlxsw_sp_lpm_tree *lpm_tree; 309 int err; 310 311 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); 312 if (!lpm_tree) 313 return ERR_PTR(-EBUSY); 314 lpm_tree->proto = proto; 315 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); 316 if (err) 317 return ERR_PTR(err); 318 319 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, 320 lpm_tree); 321 if (err) 322 goto err_left_struct_set; 323 memcpy(&lpm_tree->prefix_usage, prefix_usage, 324 sizeof(lpm_tree->prefix_usage)); 325 return lpm_tree; 326 327 err_left_struct_set: 328 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 329 return ERR_PTR(err); 330 } 331 332 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, 333 struct mlxsw_sp_lpm_tree *lpm_tree) 334 { 335 return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 336 } 337 338 static struct mlxsw_sp_lpm_tree * 339 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, 340 struct mlxsw_sp_prefix_usage *prefix_usage, 341 enum mlxsw_sp_l3proto proto, bool one_reserved) 342 { 343 struct mlxsw_sp_lpm_tree *lpm_tree; 344 int i; 345 346 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 347 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 348 if (lpm_tree->ref_count != 0 && 349 lpm_tree->proto == proto && 350 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, 351 prefix_usage)) 352 goto inc_ref_count; 353 } 354 lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, 355 proto, one_reserved); 356 if (IS_ERR(lpm_tree)) 357 return lpm_tree; 358 359 inc_ref_count: 360 lpm_tree->ref_count++; 361 return lpm_tree; 362 } 363 364 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, 365 struct mlxsw_sp_lpm_tree *lpm_tree) 366 { 367 if (--lpm_tree->ref_count == 0) 368 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); 369 return 0; 370 } 371 372 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) 373 { 374 struct mlxsw_sp_lpm_tree *lpm_tree; 375 int i; 376 377 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 378 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 379 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; 380 } 381 } 382 383 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) 384 { 385 struct mlxsw_resources *resources; 386 struct mlxsw_sp_vr *vr; 387 int i; 388 389 resources = mlxsw_core_resources_get(mlxsw_sp->core); 390 for (i = 0; i < resources->max_virtual_routers; i++) { 391 vr = &mlxsw_sp->router.vrs[i]; 392 if (!vr->used) 393 return vr; 394 } 395 return NULL; 396 } 397 398 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, 399 struct mlxsw_sp_vr *vr) 400 { 401 char raltb_pl[MLXSW_REG_RALTB_LEN]; 402 403 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 404 (enum mlxsw_reg_ralxx_protocol) vr->proto, 405 vr->lpm_tree->id); 406 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 407 } 408 409 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, 410 struct mlxsw_sp_vr *vr) 411 { 412 char raltb_pl[MLXSW_REG_RALTB_LEN]; 413 414 /* Bind to tree 0 which is default */ 415 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 416 (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); 417 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 418 } 419 420 static u32 mlxsw_sp_fix_tb_id(u32 tb_id) 421 { 422 /* For our purpose, squash main and local table into one */ 423 if (tb_id == RT_TABLE_LOCAL) 424 tb_id = RT_TABLE_MAIN; 425 return tb_id; 426 } 427 428 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, 429 u32 tb_id, 430 enum mlxsw_sp_l3proto proto) 431 { 432 struct mlxsw_resources *resources; 433 struct mlxsw_sp_vr *vr; 434 int i; 435 436 tb_id = mlxsw_sp_fix_tb_id(tb_id); 437 438 resources = mlxsw_core_resources_get(mlxsw_sp->core); 439 for (i = 0; i < resources->max_virtual_routers; i++) { 440 vr = &mlxsw_sp->router.vrs[i]; 441 if (vr->used && vr->proto == proto && vr->tb_id == tb_id) 442 return vr; 443 } 444 return NULL; 445 } 446 447 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, 448 unsigned char prefix_len, 449 u32 tb_id, 450 enum mlxsw_sp_l3proto proto) 451 { 452 struct mlxsw_sp_prefix_usage req_prefix_usage; 453 struct mlxsw_sp_lpm_tree *lpm_tree; 454 struct mlxsw_sp_vr *vr; 455 int err; 456 457 vr = mlxsw_sp_vr_find_unused(mlxsw_sp); 458 if (!vr) 459 return ERR_PTR(-EBUSY); 460 vr->fib = mlxsw_sp_fib_create(); 461 if (IS_ERR(vr->fib)) 462 return ERR_CAST(vr->fib); 463 464 vr->proto = proto; 465 vr->tb_id = tb_id; 466 mlxsw_sp_prefix_usage_zero(&req_prefix_usage); 467 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 468 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, 469 proto, true); 470 if (IS_ERR(lpm_tree)) { 471 err = PTR_ERR(lpm_tree); 472 goto err_tree_get; 473 } 474 vr->lpm_tree = lpm_tree; 475 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 476 if (err) 477 goto err_tree_bind; 478 479 vr->used = true; 480 return vr; 481 482 err_tree_bind: 483 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 484 err_tree_get: 485 mlxsw_sp_fib_destroy(vr->fib); 486 487 return ERR_PTR(err); 488 } 489 490 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, 491 struct mlxsw_sp_vr *vr) 492 { 493 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 494 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 495 mlxsw_sp_fib_destroy(vr->fib); 496 vr->used = false; 497 } 498 499 static int 500 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, 501 struct mlxsw_sp_prefix_usage *req_prefix_usage) 502 { 503 struct mlxsw_sp_lpm_tree *lpm_tree; 504 505 if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, 506 &vr->lpm_tree->prefix_usage)) 507 return 0; 508 509 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, 510 vr->proto, false); 511 if (IS_ERR(lpm_tree)) { 512 /* We failed to get a tree according to the required 513 * prefix usage. However, the current tree might be still good 514 * for us if our requirement is subset of the prefixes used 515 * in the tree. 516 */ 517 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, 518 &vr->lpm_tree->prefix_usage)) 519 return 0; 520 return PTR_ERR(lpm_tree); 521 } 522 523 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 524 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 525 vr->lpm_tree = lpm_tree; 526 return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 527 } 528 529 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, 530 unsigned char prefix_len, 531 u32 tb_id, 532 enum mlxsw_sp_l3proto proto) 533 { 534 struct mlxsw_sp_vr *vr; 535 int err; 536 537 tb_id = mlxsw_sp_fix_tb_id(tb_id); 538 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); 539 if (!vr) { 540 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); 541 if (IS_ERR(vr)) 542 return vr; 543 } else { 544 struct mlxsw_sp_prefix_usage req_prefix_usage; 545 546 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, 547 &vr->fib->prefix_usage); 548 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 549 /* Need to replace LPM tree in case new prefix is required. */ 550 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 551 &req_prefix_usage); 552 if (err) 553 return ERR_PTR(err); 554 } 555 return vr; 556 } 557 558 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) 559 { 560 /* Destroy virtual router entity in case the associated FIB is empty 561 * and allow it to be used for other tables in future. Otherwise, 562 * check if some prefix usage did not disappear and change tree if 563 * that is the case. Note that in case new, smaller tree cannot be 564 * allocated, the original one will be kept being used. 565 */ 566 if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) 567 mlxsw_sp_vr_destroy(mlxsw_sp, vr); 568 else 569 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 570 &vr->fib->prefix_usage); 571 } 572 573 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) 574 { 575 struct mlxsw_resources *resources; 576 struct mlxsw_sp_vr *vr; 577 int i; 578 579 resources = mlxsw_core_resources_get(mlxsw_sp->core); 580 if (!resources->max_virtual_routers_valid) 581 return -EIO; 582 583 mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers, 584 sizeof(struct mlxsw_sp_vr), 585 GFP_KERNEL); 586 if (!mlxsw_sp->router.vrs) 587 return -ENOMEM; 588 589 for (i = 0; i < resources->max_virtual_routers; i++) { 590 vr = &mlxsw_sp->router.vrs[i]; 591 vr->id = i; 592 } 593 594 return 0; 595 } 596 597 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) 598 { 599 kfree(mlxsw_sp->router.vrs); 600 } 601 602 struct mlxsw_sp_neigh_key { 603 unsigned char addr[sizeof(struct in6_addr)]; 604 struct net_device *dev; 605 }; 606 607 struct mlxsw_sp_neigh_entry { 608 struct rhash_head ht_node; 609 struct mlxsw_sp_neigh_key key; 610 u16 rif; 611 struct neighbour *n; 612 bool offloaded; 613 struct delayed_work dw; 614 struct mlxsw_sp_port *mlxsw_sp_port; 615 unsigned char ha[ETH_ALEN]; 616 struct list_head nexthop_list; /* list of nexthops using 617 * this neigh entry 618 */ 619 struct list_head nexthop_neighs_list_node; 620 }; 621 622 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { 623 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), 624 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), 625 .key_len = sizeof(struct mlxsw_sp_neigh_key), 626 }; 627 628 static int 629 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, 630 struct mlxsw_sp_neigh_entry *neigh_entry) 631 { 632 return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, 633 &neigh_entry->ht_node, 634 mlxsw_sp_neigh_ht_params); 635 } 636 637 static void 638 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, 639 struct mlxsw_sp_neigh_entry *neigh_entry) 640 { 641 rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, 642 &neigh_entry->ht_node, 643 mlxsw_sp_neigh_ht_params); 644 } 645 646 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); 647 648 static struct mlxsw_sp_neigh_entry * 649 mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len, 650 struct net_device *dev, u16 rif, 651 struct neighbour *n) 652 { 653 struct mlxsw_sp_neigh_entry *neigh_entry; 654 655 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); 656 if (!neigh_entry) 657 return NULL; 658 memcpy(neigh_entry->key.addr, addr, addr_len); 659 neigh_entry->key.dev = dev; 660 neigh_entry->rif = rif; 661 neigh_entry->n = n; 662 INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); 663 INIT_LIST_HEAD(&neigh_entry->nexthop_list); 664 return neigh_entry; 665 } 666 667 static void 668 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) 669 { 670 kfree(neigh_entry); 671 } 672 673 static struct mlxsw_sp_neigh_entry * 674 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr, 675 size_t addr_len, struct net_device *dev) 676 { 677 struct mlxsw_sp_neigh_key key = {{ 0 } }; 678 679 memcpy(key.addr, addr, addr_len); 680 key.dev = dev; 681 return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, 682 &key, mlxsw_sp_neigh_ht_params); 683 } 684 685 int mlxsw_sp_router_neigh_construct(struct net_device *dev, 686 struct neighbour *n) 687 { 688 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); 689 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 690 struct mlxsw_sp_neigh_entry *neigh_entry; 691 struct mlxsw_sp_rif *r; 692 u32 dip; 693 int err; 694 695 if (n->tbl != &arp_tbl) 696 return 0; 697 698 dip = ntohl(*((__be32 *) n->primary_key)); 699 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), 700 n->dev); 701 if (neigh_entry) { 702 WARN_ON(neigh_entry->n != n); 703 return 0; 704 } 705 706 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); 707 if (WARN_ON(!r)) 708 return -EINVAL; 709 710 neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev, 711 r->rif, n); 712 if (!neigh_entry) 713 return -ENOMEM; 714 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); 715 if (err) 716 goto err_neigh_entry_insert; 717 return 0; 718 719 err_neigh_entry_insert: 720 mlxsw_sp_neigh_entry_destroy(neigh_entry); 721 return err; 722 } 723 724 void mlxsw_sp_router_neigh_destroy(struct net_device *dev, 725 struct neighbour *n) 726 { 727 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); 728 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 729 struct mlxsw_sp_neigh_entry *neigh_entry; 730 u32 dip; 731 732 if (n->tbl != &arp_tbl) 733 return; 734 735 dip = ntohl(*((__be32 *) n->primary_key)); 736 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), 737 n->dev); 738 if (!neigh_entry) 739 return; 740 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); 741 mlxsw_sp_neigh_entry_destroy(neigh_entry); 742 } 743 744 static void 745 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) 746 { 747 unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); 748 749 mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); 750 } 751 752 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, 753 char *rauhtd_pl, 754 int ent_index) 755 { 756 struct net_device *dev; 757 struct neighbour *n; 758 __be32 dipn; 759 u32 dip; 760 u16 rif; 761 762 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); 763 764 if (!mlxsw_sp->rifs[rif]) { 765 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); 766 return; 767 } 768 769 dipn = htonl(dip); 770 dev = mlxsw_sp->rifs[rif]->dev; 771 n = neigh_lookup(&arp_tbl, &dipn, dev); 772 if (!n) { 773 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", 774 &dip); 775 return; 776 } 777 778 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); 779 neigh_event_send(n, NULL); 780 neigh_release(n); 781 } 782 783 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, 784 char *rauhtd_pl, 785 int rec_index) 786 { 787 u8 num_entries; 788 int i; 789 790 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 791 rec_index); 792 /* Hardware starts counting at 0, so add 1. */ 793 num_entries++; 794 795 /* Each record consists of several neighbour entries. */ 796 for (i = 0; i < num_entries; i++) { 797 int ent_index; 798 799 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; 800 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, 801 ent_index); 802 } 803 804 } 805 806 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, 807 char *rauhtd_pl, int rec_index) 808 { 809 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { 810 case MLXSW_REG_RAUHTD_TYPE_IPV4: 811 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, 812 rec_index); 813 break; 814 case MLXSW_REG_RAUHTD_TYPE_IPV6: 815 WARN_ON_ONCE(1); 816 break; 817 } 818 } 819 820 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) 821 { 822 char *rauhtd_pl; 823 u8 num_rec; 824 int i, err; 825 826 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); 827 if (!rauhtd_pl) 828 return -ENOMEM; 829 830 /* Make sure the neighbour's netdev isn't removed in the 831 * process. 832 */ 833 rtnl_lock(); 834 do { 835 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); 836 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), 837 rauhtd_pl); 838 if (err) { 839 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); 840 break; 841 } 842 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 843 for (i = 0; i < num_rec; i++) 844 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, 845 i); 846 } while (num_rec); 847 rtnl_unlock(); 848 849 kfree(rauhtd_pl); 850 return err; 851 } 852 853 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) 854 { 855 struct mlxsw_sp_neigh_entry *neigh_entry; 856 857 /* Take RTNL mutex here to prevent lists from changes */ 858 rtnl_lock(); 859 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 860 nexthop_neighs_list_node) { 861 /* If this neigh have nexthops, make the kernel think this neigh 862 * is active regardless of the traffic. 863 */ 864 if (!list_empty(&neigh_entry->nexthop_list)) 865 neigh_event_send(neigh_entry->n, NULL); 866 } 867 rtnl_unlock(); 868 } 869 870 static void 871 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) 872 { 873 unsigned long interval = mlxsw_sp->router.neighs_update.interval; 874 875 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 876 msecs_to_jiffies(interval)); 877 } 878 879 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) 880 { 881 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 882 router.neighs_update.dw.work); 883 int err; 884 885 err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); 886 if (err) 887 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); 888 889 mlxsw_sp_router_neighs_update_nh(mlxsw_sp); 890 891 mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); 892 } 893 894 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) 895 { 896 struct mlxsw_sp_neigh_entry *neigh_entry; 897 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 898 router.nexthop_probe_dw.work); 899 900 /* Iterate over nexthop neighbours, find those who are unresolved and 901 * send arp on them. This solves the chicken-egg problem when 902 * the nexthop wouldn't get offloaded until the neighbor is resolved 903 * but it wouldn't get resolved ever in case traffic is flowing in HW 904 * using different nexthop. 905 * 906 * Take RTNL mutex here to prevent lists from changes. 907 */ 908 rtnl_lock(); 909 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 910 nexthop_neighs_list_node) { 911 if (!(neigh_entry->n->nud_state & NUD_VALID) && 912 !list_empty(&neigh_entry->nexthop_list)) 913 neigh_event_send(neigh_entry->n, NULL); 914 } 915 rtnl_unlock(); 916 917 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 918 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); 919 } 920 921 static void 922 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 923 struct mlxsw_sp_neigh_entry *neigh_entry, 924 bool removing); 925 926 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) 927 { 928 struct mlxsw_sp_neigh_entry *neigh_entry = 929 container_of(work, struct mlxsw_sp_neigh_entry, dw.work); 930 struct neighbour *n = neigh_entry->n; 931 struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; 932 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 933 char rauht_pl[MLXSW_REG_RAUHT_LEN]; 934 struct net_device *dev; 935 bool entry_connected; 936 u8 nud_state; 937 bool updating; 938 bool removing; 939 bool adding; 940 u32 dip; 941 int err; 942 943 read_lock_bh(&n->lock); 944 dip = ntohl(*((__be32 *) n->primary_key)); 945 memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); 946 nud_state = n->nud_state; 947 dev = n->dev; 948 read_unlock_bh(&n->lock); 949 950 entry_connected = nud_state & NUD_VALID; 951 adding = (!neigh_entry->offloaded) && entry_connected; 952 updating = neigh_entry->offloaded && entry_connected; 953 removing = neigh_entry->offloaded && !entry_connected; 954 955 if (adding || updating) { 956 mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, 957 neigh_entry->rif, 958 neigh_entry->ha, dip); 959 err = mlxsw_reg_write(mlxsw_sp->core, 960 MLXSW_REG(rauht), rauht_pl); 961 if (err) { 962 netdev_err(dev, "Could not add neigh %pI4h\n", &dip); 963 neigh_entry->offloaded = false; 964 } else { 965 neigh_entry->offloaded = true; 966 } 967 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); 968 } else if (removing) { 969 mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, 970 neigh_entry->rif, 971 neigh_entry->ha, dip); 972 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), 973 rauht_pl); 974 if (err) { 975 netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); 976 neigh_entry->offloaded = true; 977 } else { 978 neigh_entry->offloaded = false; 979 } 980 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); 981 } 982 983 neigh_release(n); 984 mlxsw_sp_port_dev_put(mlxsw_sp_port); 985 } 986 987 int mlxsw_sp_router_netevent_event(struct notifier_block *unused, 988 unsigned long event, void *ptr) 989 { 990 struct mlxsw_sp_neigh_entry *neigh_entry; 991 struct mlxsw_sp_port *mlxsw_sp_port; 992 struct mlxsw_sp *mlxsw_sp; 993 unsigned long interval; 994 struct net_device *dev; 995 struct neigh_parms *p; 996 struct neighbour *n; 997 u32 dip; 998 999 switch (event) { 1000 case NETEVENT_DELAY_PROBE_TIME_UPDATE: 1001 p = ptr; 1002 1003 /* We don't care about changes in the default table. */ 1004 if (!p->dev || p->tbl != &arp_tbl) 1005 return NOTIFY_DONE; 1006 1007 /* We are in atomic context and can't take RTNL mutex, 1008 * so use RCU variant to walk the device chain. 1009 */ 1010 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); 1011 if (!mlxsw_sp_port) 1012 return NOTIFY_DONE; 1013 1014 mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 1015 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); 1016 mlxsw_sp->router.neighs_update.interval = interval; 1017 1018 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1019 break; 1020 case NETEVENT_NEIGH_UPDATE: 1021 n = ptr; 1022 dev = n->dev; 1023 1024 if (n->tbl != &arp_tbl) 1025 return NOTIFY_DONE; 1026 1027 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); 1028 if (!mlxsw_sp_port) 1029 return NOTIFY_DONE; 1030 1031 mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 1032 dip = ntohl(*((__be32 *) n->primary_key)); 1033 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, 1034 &dip, 1035 sizeof(__be32), 1036 dev); 1037 if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) { 1038 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1039 return NOTIFY_DONE; 1040 } 1041 neigh_entry->mlxsw_sp_port = mlxsw_sp_port; 1042 1043 /* Take a reference to ensure the neighbour won't be 1044 * destructed until we drop the reference in delayed 1045 * work. 1046 */ 1047 neigh_clone(n); 1048 if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { 1049 neigh_release(n); 1050 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1051 } 1052 break; 1053 } 1054 1055 return NOTIFY_DONE; 1056 } 1057 1058 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) 1059 { 1060 int err; 1061 1062 err = rhashtable_init(&mlxsw_sp->router.neigh_ht, 1063 &mlxsw_sp_neigh_ht_params); 1064 if (err) 1065 return err; 1066 1067 /* Initialize the polling interval according to the default 1068 * table. 1069 */ 1070 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); 1071 1072 /* Create the delayed works for the activity_update */ 1073 INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, 1074 mlxsw_sp_router_neighs_update_work); 1075 INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, 1076 mlxsw_sp_router_probe_unresolved_nexthops); 1077 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); 1078 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); 1079 return 0; 1080 } 1081 1082 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) 1083 { 1084 cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); 1085 cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); 1086 rhashtable_destroy(&mlxsw_sp->router.neigh_ht); 1087 } 1088 1089 struct mlxsw_sp_nexthop { 1090 struct list_head neigh_list_node; /* member of neigh entry list */ 1091 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group 1092 * this belongs to 1093 */ 1094 u8 should_offload:1, /* set indicates this neigh is connected and 1095 * should be put to KVD linear area of this group. 1096 */ 1097 offloaded:1, /* set in case the neigh is actually put into 1098 * KVD linear area of this group. 1099 */ 1100 update:1; /* set indicates that MAC of this neigh should be 1101 * updated in HW 1102 */ 1103 struct mlxsw_sp_neigh_entry *neigh_entry; 1104 }; 1105 1106 struct mlxsw_sp_nexthop_group { 1107 struct list_head list; /* node in mlxsw->router.nexthop_group_list */ 1108 struct list_head fib_list; /* list of fib entries that use this group */ 1109 u8 adj_index_valid:1; 1110 u32 adj_index; 1111 u16 ecmp_size; 1112 u16 count; 1113 struct mlxsw_sp_nexthop nexthops[0]; 1114 }; 1115 1116 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, 1117 struct mlxsw_sp_vr *vr, 1118 u32 adj_index, u16 ecmp_size, 1119 u32 new_adj_index, 1120 u16 new_ecmp_size) 1121 { 1122 char raleu_pl[MLXSW_REG_RALEU_LEN]; 1123 1124 mlxsw_reg_raleu_pack(raleu_pl, 1125 (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, 1126 adj_index, ecmp_size, new_adj_index, 1127 new_ecmp_size); 1128 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); 1129 } 1130 1131 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, 1132 struct mlxsw_sp_nexthop_group *nh_grp, 1133 u32 old_adj_index, u16 old_ecmp_size) 1134 { 1135 struct mlxsw_sp_fib_entry *fib_entry; 1136 struct mlxsw_sp_vr *vr = NULL; 1137 int err; 1138 1139 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1140 if (vr == fib_entry->vr) 1141 continue; 1142 vr = fib_entry->vr; 1143 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, 1144 old_adj_index, 1145 old_ecmp_size, 1146 nh_grp->adj_index, 1147 nh_grp->ecmp_size); 1148 if (err) 1149 return err; 1150 } 1151 return 0; 1152 } 1153 1154 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, 1155 struct mlxsw_sp_nexthop *nh) 1156 { 1157 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1158 char ratr_pl[MLXSW_REG_RATR_LEN]; 1159 1160 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, 1161 true, adj_index, neigh_entry->rif); 1162 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); 1163 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); 1164 } 1165 1166 static int 1167 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, 1168 struct mlxsw_sp_nexthop_group *nh_grp) 1169 { 1170 u32 adj_index = nh_grp->adj_index; /* base */ 1171 struct mlxsw_sp_nexthop *nh; 1172 int i; 1173 int err; 1174 1175 for (i = 0; i < nh_grp->count; i++) { 1176 nh = &nh_grp->nexthops[i]; 1177 1178 if (!nh->should_offload) { 1179 nh->offloaded = 0; 1180 continue; 1181 } 1182 1183 if (nh->update) { 1184 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, 1185 adj_index, nh); 1186 if (err) 1187 return err; 1188 nh->update = 0; 1189 nh->offloaded = 1; 1190 } 1191 adj_index++; 1192 } 1193 return 0; 1194 } 1195 1196 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1197 struct mlxsw_sp_fib_entry *fib_entry); 1198 1199 static int 1200 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, 1201 struct mlxsw_sp_nexthop_group *nh_grp) 1202 { 1203 struct mlxsw_sp_fib_entry *fib_entry; 1204 int err; 1205 1206 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1207 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 1208 if (err) 1209 return err; 1210 } 1211 return 0; 1212 } 1213 1214 static void 1215 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, 1216 struct mlxsw_sp_nexthop_group *nh_grp) 1217 { 1218 struct mlxsw_sp_nexthop *nh; 1219 bool offload_change = false; 1220 u32 adj_index; 1221 u16 ecmp_size = 0; 1222 bool old_adj_index_valid; 1223 u32 old_adj_index; 1224 u16 old_ecmp_size; 1225 int ret; 1226 int i; 1227 int err; 1228 1229 for (i = 0; i < nh_grp->count; i++) { 1230 nh = &nh_grp->nexthops[i]; 1231 1232 if (nh->should_offload ^ nh->offloaded) { 1233 offload_change = true; 1234 if (nh->should_offload) 1235 nh->update = 1; 1236 } 1237 if (nh->should_offload) 1238 ecmp_size++; 1239 } 1240 if (!offload_change) { 1241 /* Nothing was added or removed, so no need to reallocate. Just 1242 * update MAC on existing adjacency indexes. 1243 */ 1244 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); 1245 if (err) { 1246 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1247 goto set_trap; 1248 } 1249 return; 1250 } 1251 if (!ecmp_size) 1252 /* No neigh of this group is connected so we just set 1253 * the trap and let everthing flow through kernel. 1254 */ 1255 goto set_trap; 1256 1257 ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); 1258 if (ret < 0) { 1259 /* We ran out of KVD linear space, just set the 1260 * trap and let everything flow through kernel. 1261 */ 1262 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); 1263 goto set_trap; 1264 } 1265 adj_index = ret; 1266 old_adj_index_valid = nh_grp->adj_index_valid; 1267 old_adj_index = nh_grp->adj_index; 1268 old_ecmp_size = nh_grp->ecmp_size; 1269 nh_grp->adj_index_valid = 1; 1270 nh_grp->adj_index = adj_index; 1271 nh_grp->ecmp_size = ecmp_size; 1272 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); 1273 if (err) { 1274 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1275 goto set_trap; 1276 } 1277 1278 if (!old_adj_index_valid) { 1279 /* The trap was set for fib entries, so we have to call 1280 * fib entry update to unset it and use adjacency index. 1281 */ 1282 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1283 if (err) { 1284 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); 1285 goto set_trap; 1286 } 1287 return; 1288 } 1289 1290 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, 1291 old_adj_index, old_ecmp_size); 1292 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); 1293 if (err) { 1294 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); 1295 goto set_trap; 1296 } 1297 return; 1298 1299 set_trap: 1300 old_adj_index_valid = nh_grp->adj_index_valid; 1301 nh_grp->adj_index_valid = 0; 1302 for (i = 0; i < nh_grp->count; i++) { 1303 nh = &nh_grp->nexthops[i]; 1304 nh->offloaded = 0; 1305 } 1306 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1307 if (err) 1308 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); 1309 if (old_adj_index_valid) 1310 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); 1311 } 1312 1313 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, 1314 bool removing) 1315 { 1316 if (!removing && !nh->should_offload) 1317 nh->should_offload = 1; 1318 else if (removing && nh->offloaded) 1319 nh->should_offload = 0; 1320 nh->update = 1; 1321 } 1322 1323 static void 1324 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 1325 struct mlxsw_sp_neigh_entry *neigh_entry, 1326 bool removing) 1327 { 1328 struct mlxsw_sp_nexthop *nh; 1329 1330 /* Take RTNL mutex here to prevent lists from changes */ 1331 rtnl_lock(); 1332 list_for_each_entry(nh, &neigh_entry->nexthop_list, 1333 neigh_list_node) { 1334 __mlxsw_sp_nexthop_neigh_update(nh, removing); 1335 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1336 } 1337 rtnl_unlock(); 1338 } 1339 1340 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, 1341 struct mlxsw_sp_nexthop_group *nh_grp, 1342 struct mlxsw_sp_nexthop *nh, 1343 struct fib_nh *fib_nh) 1344 { 1345 struct mlxsw_sp_neigh_entry *neigh_entry; 1346 u32 gwip = ntohl(fib_nh->nh_gw); 1347 struct net_device *dev = fib_nh->nh_dev; 1348 struct neighbour *n; 1349 u8 nud_state; 1350 1351 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, 1352 sizeof(gwip), dev); 1353 if (!neigh_entry) { 1354 __be32 gwipn = htonl(gwip); 1355 1356 n = neigh_create(&arp_tbl, &gwipn, dev); 1357 if (IS_ERR(n)) 1358 return PTR_ERR(n); 1359 neigh_event_send(n, NULL); 1360 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, 1361 sizeof(gwip), dev); 1362 if (!neigh_entry) { 1363 neigh_release(n); 1364 return -EINVAL; 1365 } 1366 } else { 1367 /* Take a reference of neigh here ensuring that neigh would 1368 * not be detructed before the nexthop entry is finished. 1369 * The second branch takes the reference in neith_create() 1370 */ 1371 n = neigh_entry->n; 1372 neigh_clone(n); 1373 } 1374 1375 /* If that is the first nexthop connected to that neigh, add to 1376 * nexthop_neighs_list 1377 */ 1378 if (list_empty(&neigh_entry->nexthop_list)) 1379 list_add_tail(&neigh_entry->nexthop_neighs_list_node, 1380 &mlxsw_sp->router.nexthop_neighs_list); 1381 1382 nh->nh_grp = nh_grp; 1383 nh->neigh_entry = neigh_entry; 1384 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); 1385 read_lock_bh(&n->lock); 1386 nud_state = n->nud_state; 1387 read_unlock_bh(&n->lock); 1388 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID)); 1389 1390 return 0; 1391 } 1392 1393 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, 1394 struct mlxsw_sp_nexthop *nh) 1395 { 1396 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1397 1398 list_del(&nh->neigh_list_node); 1399 1400 /* If that is the last nexthop connected to that neigh, remove from 1401 * nexthop_neighs_list 1402 */ 1403 if (list_empty(&nh->neigh_entry->nexthop_list)) 1404 list_del(&nh->neigh_entry->nexthop_neighs_list_node); 1405 1406 neigh_release(neigh_entry->n); 1407 } 1408 1409 static struct mlxsw_sp_nexthop_group * 1410 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1411 { 1412 struct mlxsw_sp_nexthop_group *nh_grp; 1413 struct mlxsw_sp_nexthop *nh; 1414 struct fib_nh *fib_nh; 1415 size_t alloc_size; 1416 int i; 1417 int err; 1418 1419 alloc_size = sizeof(*nh_grp) + 1420 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); 1421 nh_grp = kzalloc(alloc_size, GFP_KERNEL); 1422 if (!nh_grp) 1423 return ERR_PTR(-ENOMEM); 1424 INIT_LIST_HEAD(&nh_grp->fib_list); 1425 nh_grp->count = fi->fib_nhs; 1426 for (i = 0; i < nh_grp->count; i++) { 1427 nh = &nh_grp->nexthops[i]; 1428 fib_nh = &fi->fib_nh[i]; 1429 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); 1430 if (err) 1431 goto err_nexthop_init; 1432 } 1433 list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); 1434 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1435 return nh_grp; 1436 1437 err_nexthop_init: 1438 for (i--; i >= 0; i--) 1439 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1440 kfree(nh_grp); 1441 return ERR_PTR(err); 1442 } 1443 1444 static void 1445 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, 1446 struct mlxsw_sp_nexthop_group *nh_grp) 1447 { 1448 struct mlxsw_sp_nexthop *nh; 1449 int i; 1450 1451 list_del(&nh_grp->list); 1452 for (i = 0; i < nh_grp->count; i++) { 1453 nh = &nh_grp->nexthops[i]; 1454 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1455 } 1456 kfree(nh_grp); 1457 } 1458 1459 static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, 1460 struct fib_info *fi) 1461 { 1462 int i; 1463 1464 for (i = 0; i < fi->fib_nhs; i++) { 1465 struct fib_nh *fib_nh = &fi->fib_nh[i]; 1466 u32 gwip = ntohl(fib_nh->nh_gw); 1467 1468 if (memcmp(nh->neigh_entry->key.addr, 1469 &gwip, sizeof(u32)) == 0 && 1470 nh->neigh_entry->key.dev == fib_nh->nh_dev) 1471 return true; 1472 } 1473 return false; 1474 } 1475 1476 static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, 1477 struct fib_info *fi) 1478 { 1479 int i; 1480 1481 if (nh_grp->count != fi->fib_nhs) 1482 return false; 1483 for (i = 0; i < nh_grp->count; i++) { 1484 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; 1485 1486 if (!mlxsw_sp_nexthop_match(nh, fi)) 1487 return false; 1488 } 1489 return true; 1490 } 1491 1492 static struct mlxsw_sp_nexthop_group * 1493 mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1494 { 1495 struct mlxsw_sp_nexthop_group *nh_grp; 1496 1497 list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, 1498 list) { 1499 if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) 1500 return nh_grp; 1501 } 1502 return NULL; 1503 } 1504 1505 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, 1506 struct mlxsw_sp_fib_entry *fib_entry, 1507 struct fib_info *fi) 1508 { 1509 struct mlxsw_sp_nexthop_group *nh_grp; 1510 1511 nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); 1512 if (!nh_grp) { 1513 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); 1514 if (IS_ERR(nh_grp)) 1515 return PTR_ERR(nh_grp); 1516 } 1517 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); 1518 fib_entry->nh_group = nh_grp; 1519 return 0; 1520 } 1521 1522 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, 1523 struct mlxsw_sp_fib_entry *fib_entry) 1524 { 1525 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; 1526 1527 list_del(&fib_entry->nexthop_group_node); 1528 if (!list_empty(&nh_grp->fib_list)) 1529 return; 1530 mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); 1531 } 1532 1533 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, 1534 struct mlxsw_sp_fib_entry *fib_entry, 1535 enum mlxsw_reg_ralue_op op) 1536 { 1537 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1538 u32 *p_dip = (u32 *) fib_entry->key.addr; 1539 struct mlxsw_sp_vr *vr = fib_entry->vr; 1540 enum mlxsw_reg_ralue_trap_action trap_action; 1541 u16 trap_id = 0; 1542 u32 adjacency_index = 0; 1543 u16 ecmp_size = 0; 1544 1545 /* In case the nexthop group adjacency index is valid, use it 1546 * with provided ECMP size. Otherwise, setup trap and pass 1547 * traffic to kernel. 1548 */ 1549 if (fib_entry->nh_group->adj_index_valid) { 1550 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1551 adjacency_index = fib_entry->nh_group->adj_index; 1552 ecmp_size = fib_entry->nh_group->ecmp_size; 1553 } else { 1554 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; 1555 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; 1556 } 1557 1558 mlxsw_reg_ralue_pack4(ralue_pl, 1559 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1560 vr->id, fib_entry->key.prefix_len, *p_dip); 1561 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, 1562 adjacency_index, ecmp_size); 1563 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1564 } 1565 1566 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, 1567 struct mlxsw_sp_fib_entry *fib_entry, 1568 enum mlxsw_reg_ralue_op op) 1569 { 1570 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1571 u32 *p_dip = (u32 *) fib_entry->key.addr; 1572 struct mlxsw_sp_vr *vr = fib_entry->vr; 1573 1574 mlxsw_reg_ralue_pack4(ralue_pl, 1575 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1576 vr->id, fib_entry->key.prefix_len, *p_dip); 1577 mlxsw_reg_ralue_act_local_pack(ralue_pl, 1578 MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, 1579 fib_entry->rif); 1580 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1581 } 1582 1583 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, 1584 struct mlxsw_sp_fib_entry *fib_entry, 1585 enum mlxsw_reg_ralue_op op) 1586 { 1587 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1588 u32 *p_dip = (u32 *) fib_entry->key.addr; 1589 struct mlxsw_sp_vr *vr = fib_entry->vr; 1590 1591 mlxsw_reg_ralue_pack4(ralue_pl, 1592 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1593 vr->id, fib_entry->key.prefix_len, *p_dip); 1594 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 1595 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1596 } 1597 1598 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, 1599 struct mlxsw_sp_fib_entry *fib_entry, 1600 enum mlxsw_reg_ralue_op op) 1601 { 1602 switch (fib_entry->type) { 1603 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: 1604 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); 1605 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: 1606 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); 1607 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: 1608 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); 1609 } 1610 return -EINVAL; 1611 } 1612 1613 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, 1614 struct mlxsw_sp_fib_entry *fib_entry, 1615 enum mlxsw_reg_ralue_op op) 1616 { 1617 switch (fib_entry->vr->proto) { 1618 case MLXSW_SP_L3_PROTO_IPV4: 1619 return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); 1620 case MLXSW_SP_L3_PROTO_IPV6: 1621 return -EINVAL; 1622 } 1623 return -EINVAL; 1624 } 1625 1626 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1627 struct mlxsw_sp_fib_entry *fib_entry) 1628 { 1629 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1630 MLXSW_REG_RALUE_OP_WRITE_WRITE); 1631 } 1632 1633 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, 1634 struct mlxsw_sp_fib_entry *fib_entry) 1635 { 1636 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1637 MLXSW_REG_RALUE_OP_WRITE_DELETE); 1638 } 1639 1640 static int 1641 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, 1642 const struct fib_entry_notifier_info *fen_info, 1643 struct mlxsw_sp_fib_entry *fib_entry) 1644 { 1645 struct fib_info *fi = fen_info->fi; 1646 struct mlxsw_sp_rif *r = NULL; 1647 int nhsel; 1648 int err; 1649 1650 if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { 1651 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1652 return 0; 1653 } 1654 if (fen_info->type != RTN_UNICAST) 1655 return -EINVAL; 1656 1657 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1658 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 1659 1660 if (!nh->nh_dev) 1661 continue; 1662 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev); 1663 if (!r) { 1664 /* In case router interface is not found for 1665 * at least one of the nexthops, that means 1666 * the nexthop points to some device unrelated 1667 * to us. Set trap and pass the packets for 1668 * this prefix to kernel. 1669 */ 1670 break; 1671 } 1672 } 1673 1674 if (!r) { 1675 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1676 return 0; 1677 } 1678 1679 if (fi->fib_scope != RT_SCOPE_UNIVERSE) { 1680 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; 1681 fib_entry->rif = r->rif; 1682 } else { 1683 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; 1684 err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); 1685 if (err) 1686 return err; 1687 } 1688 fib_info_offload_inc(fen_info->fi); 1689 return 0; 1690 } 1691 1692 static void 1693 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, 1694 struct mlxsw_sp_fib_entry *fib_entry) 1695 { 1696 if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) 1697 fib_info_offload_dec(fib_entry->fi); 1698 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) 1699 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); 1700 } 1701 1702 static struct mlxsw_sp_fib_entry * 1703 mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, 1704 const struct fib_entry_notifier_info *fen_info) 1705 { 1706 struct mlxsw_sp_fib_entry *fib_entry; 1707 struct fib_info *fi = fen_info->fi; 1708 struct mlxsw_sp_vr *vr; 1709 int err; 1710 1711 vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, 1712 MLXSW_SP_L3_PROTO_IPV4); 1713 if (IS_ERR(vr)) 1714 return ERR_CAST(vr); 1715 1716 fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, 1717 sizeof(fen_info->dst), 1718 fen_info->dst_len, fi->fib_dev); 1719 if (fib_entry) { 1720 /* Already exists, just take a reference */ 1721 fib_entry->ref_count++; 1722 return fib_entry; 1723 } 1724 fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst, 1725 sizeof(fen_info->dst), 1726 fen_info->dst_len, fi->fib_dev); 1727 if (!fib_entry) { 1728 err = -ENOMEM; 1729 goto err_fib_entry_create; 1730 } 1731 fib_entry->vr = vr; 1732 fib_entry->fi = fi; 1733 fib_entry->ref_count = 1; 1734 1735 err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry); 1736 if (err) 1737 goto err_fib4_entry_init; 1738 1739 return fib_entry; 1740 1741 err_fib4_entry_init: 1742 mlxsw_sp_fib_entry_destroy(fib_entry); 1743 err_fib_entry_create: 1744 mlxsw_sp_vr_put(mlxsw_sp, vr); 1745 1746 return ERR_PTR(err); 1747 } 1748 1749 static struct mlxsw_sp_fib_entry * 1750 mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, 1751 const struct fib_entry_notifier_info *fen_info) 1752 { 1753 struct mlxsw_sp_vr *vr; 1754 1755 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id, 1756 MLXSW_SP_L3_PROTO_IPV4); 1757 if (!vr) 1758 return NULL; 1759 1760 return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, 1761 sizeof(fen_info->dst), 1762 fen_info->dst_len, 1763 fen_info->fi->fib_dev); 1764 } 1765 1766 static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, 1767 struct mlxsw_sp_fib_entry *fib_entry) 1768 { 1769 struct mlxsw_sp_vr *vr = fib_entry->vr; 1770 1771 if (--fib_entry->ref_count == 0) { 1772 mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); 1773 mlxsw_sp_fib_entry_destroy(fib_entry); 1774 } 1775 mlxsw_sp_vr_put(mlxsw_sp, vr); 1776 } 1777 1778 static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp, 1779 struct mlxsw_sp_fib_entry *fib_entry) 1780 { 1781 unsigned int last_ref_count; 1782 1783 do { 1784 last_ref_count = fib_entry->ref_count; 1785 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1786 } while (last_ref_count != 1); 1787 } 1788 1789 static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, 1790 struct fib_entry_notifier_info *fen_info) 1791 { 1792 struct mlxsw_sp_fib_entry *fib_entry; 1793 struct mlxsw_sp_vr *vr; 1794 int err; 1795 1796 if (mlxsw_sp->router.aborted) 1797 return 0; 1798 1799 fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info); 1800 if (IS_ERR(fib_entry)) { 1801 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n"); 1802 return PTR_ERR(fib_entry); 1803 } 1804 1805 if (fib_entry->ref_count != 1) 1806 return 0; 1807 1808 vr = fib_entry->vr; 1809 err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); 1810 if (err) { 1811 dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n"); 1812 goto err_fib_entry_insert; 1813 } 1814 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 1815 if (err) 1816 goto err_fib_entry_add; 1817 return 0; 1818 1819 err_fib_entry_add: 1820 mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); 1821 err_fib_entry_insert: 1822 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1823 return err; 1824 } 1825 1826 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, 1827 struct fib_entry_notifier_info *fen_info) 1828 { 1829 struct mlxsw_sp_fib_entry *fib_entry; 1830 1831 if (mlxsw_sp->router.aborted) 1832 return; 1833 1834 fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); 1835 if (!fib_entry) 1836 return; 1837 1838 if (fib_entry->ref_count == 1) { 1839 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); 1840 mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry); 1841 } 1842 1843 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1844 } 1845 1846 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) 1847 { 1848 char ralta_pl[MLXSW_REG_RALTA_LEN]; 1849 char ralst_pl[MLXSW_REG_RALST_LEN]; 1850 char raltb_pl[MLXSW_REG_RALTB_LEN]; 1851 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1852 int err; 1853 1854 mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, 1855 MLXSW_SP_LPM_TREE_MIN); 1856 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 1857 if (err) 1858 return err; 1859 1860 mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); 1861 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 1862 if (err) 1863 return err; 1864 1865 mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 1866 MLXSW_SP_LPM_TREE_MIN); 1867 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 1868 if (err) 1869 return err; 1870 1871 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, 1872 MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); 1873 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 1874 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1875 } 1876 1877 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) 1878 { 1879 struct mlxsw_resources *resources; 1880 struct mlxsw_sp_fib_entry *fib_entry; 1881 struct mlxsw_sp_fib_entry *tmp; 1882 struct mlxsw_sp_vr *vr; 1883 int i; 1884 int err; 1885 1886 resources = mlxsw_core_resources_get(mlxsw_sp->core); 1887 for (i = 0; i < resources->max_virtual_routers; i++) { 1888 vr = &mlxsw_sp->router.vrs[i]; 1889 if (!vr->used) 1890 continue; 1891 1892 list_for_each_entry_safe(fib_entry, tmp, 1893 &vr->fib->entry_list, list) { 1894 bool do_break = &tmp->list == &vr->fib->entry_list; 1895 1896 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); 1897 mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, 1898 fib_entry); 1899 mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry); 1900 if (do_break) 1901 break; 1902 } 1903 } 1904 mlxsw_sp->router.aborted = true; 1905 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); 1906 if (err) 1907 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); 1908 } 1909 1910 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 1911 { 1912 struct mlxsw_resources *resources; 1913 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 1914 int err; 1915 1916 resources = mlxsw_core_resources_get(mlxsw_sp->core); 1917 if (!resources->max_rif_valid) 1918 return -EIO; 1919 1920 mlxsw_sp->rifs = kcalloc(resources->max_rif, 1921 sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); 1922 if (!mlxsw_sp->rifs) 1923 return -ENOMEM; 1924 1925 mlxsw_reg_rgcr_pack(rgcr_pl, true); 1926 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); 1927 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 1928 if (err) 1929 goto err_rgcr_fail; 1930 1931 return 0; 1932 1933 err_rgcr_fail: 1934 kfree(mlxsw_sp->rifs); 1935 return err; 1936 } 1937 1938 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 1939 { 1940 struct mlxsw_resources *resources; 1941 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 1942 int i; 1943 1944 mlxsw_reg_rgcr_pack(rgcr_pl, false); 1945 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 1946 1947 resources = mlxsw_core_resources_get(mlxsw_sp->core); 1948 for (i = 0; i < resources->max_rif; i++) 1949 WARN_ON_ONCE(mlxsw_sp->rifs[i]); 1950 1951 kfree(mlxsw_sp->rifs); 1952 } 1953 1954 static int mlxsw_sp_router_fib_event(struct notifier_block *nb, 1955 unsigned long event, void *ptr) 1956 { 1957 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 1958 struct fib_entry_notifier_info *fen_info = ptr; 1959 int err; 1960 1961 switch (event) { 1962 case FIB_EVENT_ENTRY_ADD: 1963 err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info); 1964 if (err) 1965 mlxsw_sp_router_fib4_abort(mlxsw_sp); 1966 break; 1967 case FIB_EVENT_ENTRY_DEL: 1968 mlxsw_sp_router_fib4_del(mlxsw_sp, fen_info); 1969 break; 1970 case FIB_EVENT_RULE_ADD: /* fall through */ 1971 case FIB_EVENT_RULE_DEL: 1972 mlxsw_sp_router_fib4_abort(mlxsw_sp); 1973 break; 1974 } 1975 return NOTIFY_DONE; 1976 } 1977 1978 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 1979 { 1980 int err; 1981 1982 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); 1983 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); 1984 err = __mlxsw_sp_router_init(mlxsw_sp); 1985 if (err) 1986 return err; 1987 1988 mlxsw_sp_lpm_init(mlxsw_sp); 1989 err = mlxsw_sp_vrs_init(mlxsw_sp); 1990 if (err) 1991 goto err_vrs_init; 1992 1993 err = mlxsw_sp_neigh_init(mlxsw_sp); 1994 if (err) 1995 goto err_neigh_init; 1996 1997 mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; 1998 register_fib_notifier(&mlxsw_sp->fib_nb); 1999 return 0; 2000 2001 err_neigh_init: 2002 mlxsw_sp_vrs_fini(mlxsw_sp); 2003 err_vrs_init: 2004 __mlxsw_sp_router_fini(mlxsw_sp); 2005 return err; 2006 } 2007 2008 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 2009 { 2010 unregister_fib_notifier(&mlxsw_sp->fib_nb); 2011 mlxsw_sp_neigh_fini(mlxsw_sp); 2012 mlxsw_sp_vrs_fini(mlxsw_sp); 2013 __mlxsw_sp_router_fini(mlxsw_sp); 2014 } 2015