1 /* 2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> 5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> 6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the names of the copyright holders nor the names of its 17 * contributors may be used to endorse or promote products derived from 18 * this software without specific prior written permission. 19 * 20 * Alternatively, this software may be distributed under the terms of the 21 * GNU General Public License ("GPL") version 2 as published by the Free 22 * Software Foundation. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <linux/kernel.h> 38 #include <linux/types.h> 39 #include <linux/rhashtable.h> 40 #include <linux/bitops.h> 41 #include <linux/in6.h> 42 #include <linux/notifier.h> 43 #include <net/netevent.h> 44 #include <net/neighbour.h> 45 #include <net/arp.h> 46 #include <net/ip_fib.h> 47 48 #include "spectrum.h" 49 #include "core.h" 50 #include "reg.h" 51 52 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ 53 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) 54 55 static bool 56 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, 57 struct mlxsw_sp_prefix_usage *prefix_usage2) 58 { 59 unsigned char prefix; 60 61 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { 62 if (!test_bit(prefix, prefix_usage2->b)) 63 return false; 64 } 65 return true; 66 } 67 68 static bool 69 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, 70 struct mlxsw_sp_prefix_usage *prefix_usage2) 71 { 72 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 73 } 74 75 static bool 76 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) 77 { 78 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; 79 80 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); 81 } 82 83 static void 84 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, 85 struct mlxsw_sp_prefix_usage *prefix_usage2) 86 { 87 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 88 } 89 90 static void 91 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) 92 { 93 memset(prefix_usage, 0, sizeof(*prefix_usage)); 94 } 95 96 static void 97 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, 98 unsigned char prefix_len) 99 { 100 set_bit(prefix_len, prefix_usage->b); 101 } 102 103 static void 104 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, 105 unsigned char prefix_len) 106 { 107 clear_bit(prefix_len, prefix_usage->b); 108 } 109 110 struct mlxsw_sp_fib_key { 111 struct net_device *dev; 112 unsigned char addr[sizeof(struct in6_addr)]; 113 unsigned char prefix_len; 114 }; 115 116 enum mlxsw_sp_fib_entry_type { 117 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, 118 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, 119 MLXSW_SP_FIB_ENTRY_TYPE_TRAP, 120 }; 121 122 struct mlxsw_sp_nexthop_group; 123 124 struct mlxsw_sp_fib_entry { 125 struct rhash_head ht_node; 126 struct list_head list; 127 struct mlxsw_sp_fib_key key; 128 enum mlxsw_sp_fib_entry_type type; 129 unsigned int ref_count; 130 u16 rif; /* used for action local */ 131 struct mlxsw_sp_vr *vr; 132 struct fib_info *fi; 133 struct list_head nexthop_group_node; 134 struct mlxsw_sp_nexthop_group *nh_group; 135 }; 136 137 struct mlxsw_sp_fib { 138 struct rhashtable ht; 139 struct list_head entry_list; 140 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; 141 struct mlxsw_sp_prefix_usage prefix_usage; 142 }; 143 144 static const struct rhashtable_params mlxsw_sp_fib_ht_params = { 145 .key_offset = offsetof(struct mlxsw_sp_fib_entry, key), 146 .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node), 147 .key_len = sizeof(struct mlxsw_sp_fib_key), 148 .automatic_shrinking = true, 149 }; 150 151 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, 152 struct mlxsw_sp_fib_entry *fib_entry) 153 { 154 unsigned char prefix_len = fib_entry->key.prefix_len; 155 int err; 156 157 err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node, 158 mlxsw_sp_fib_ht_params); 159 if (err) 160 return err; 161 list_add_tail(&fib_entry->list, &fib->entry_list); 162 if (fib->prefix_ref_count[prefix_len]++ == 0) 163 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); 164 return 0; 165 } 166 167 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, 168 struct mlxsw_sp_fib_entry *fib_entry) 169 { 170 unsigned char prefix_len = fib_entry->key.prefix_len; 171 172 if (--fib->prefix_ref_count[prefix_len] == 0) 173 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); 174 list_del(&fib_entry->list); 175 rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, 176 mlxsw_sp_fib_ht_params); 177 } 178 179 static struct mlxsw_sp_fib_entry * 180 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr, 181 size_t addr_len, unsigned char prefix_len, 182 struct net_device *dev) 183 { 184 struct mlxsw_sp_fib_entry *fib_entry; 185 186 fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); 187 if (!fib_entry) 188 return NULL; 189 fib_entry->key.dev = dev; 190 memcpy(fib_entry->key.addr, addr, addr_len); 191 fib_entry->key.prefix_len = prefix_len; 192 return fib_entry; 193 } 194 195 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry) 196 { 197 kfree(fib_entry); 198 } 199 200 static struct mlxsw_sp_fib_entry * 201 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr, 202 size_t addr_len, unsigned char prefix_len, 203 struct net_device *dev) 204 { 205 struct mlxsw_sp_fib_key key; 206 207 memset(&key, 0, sizeof(key)); 208 key.dev = dev; 209 memcpy(key.addr, addr, addr_len); 210 key.prefix_len = prefix_len; 211 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); 212 } 213 214 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) 215 { 216 struct mlxsw_sp_fib *fib; 217 int err; 218 219 fib = kzalloc(sizeof(*fib), GFP_KERNEL); 220 if (!fib) 221 return ERR_PTR(-ENOMEM); 222 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); 223 if (err) 224 goto err_rhashtable_init; 225 INIT_LIST_HEAD(&fib->entry_list); 226 return fib; 227 228 err_rhashtable_init: 229 kfree(fib); 230 return ERR_PTR(err); 231 } 232 233 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) 234 { 235 rhashtable_destroy(&fib->ht); 236 kfree(fib); 237 } 238 239 static struct mlxsw_sp_lpm_tree * 240 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) 241 { 242 static struct mlxsw_sp_lpm_tree *lpm_tree; 243 int i; 244 245 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 246 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 247 if (lpm_tree->ref_count == 0) { 248 if (one_reserved) 249 one_reserved = false; 250 else 251 return lpm_tree; 252 } 253 } 254 return NULL; 255 } 256 257 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, 258 struct mlxsw_sp_lpm_tree *lpm_tree) 259 { 260 char ralta_pl[MLXSW_REG_RALTA_LEN]; 261 262 mlxsw_reg_ralta_pack(ralta_pl, true, 263 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 264 lpm_tree->id); 265 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 266 } 267 268 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, 269 struct mlxsw_sp_lpm_tree *lpm_tree) 270 { 271 char ralta_pl[MLXSW_REG_RALTA_LEN]; 272 273 mlxsw_reg_ralta_pack(ralta_pl, false, 274 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 275 lpm_tree->id); 276 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 277 } 278 279 static int 280 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, 281 struct mlxsw_sp_prefix_usage *prefix_usage, 282 struct mlxsw_sp_lpm_tree *lpm_tree) 283 { 284 char ralst_pl[MLXSW_REG_RALST_LEN]; 285 u8 root_bin = 0; 286 u8 prefix; 287 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; 288 289 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) 290 root_bin = prefix; 291 292 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); 293 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { 294 if (prefix == 0) 295 continue; 296 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, 297 MLXSW_REG_RALST_BIN_NO_CHILD); 298 last_prefix = prefix; 299 } 300 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 301 } 302 303 static struct mlxsw_sp_lpm_tree * 304 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, 305 struct mlxsw_sp_prefix_usage *prefix_usage, 306 enum mlxsw_sp_l3proto proto, bool one_reserved) 307 { 308 struct mlxsw_sp_lpm_tree *lpm_tree; 309 int err; 310 311 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); 312 if (!lpm_tree) 313 return ERR_PTR(-EBUSY); 314 lpm_tree->proto = proto; 315 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); 316 if (err) 317 return ERR_PTR(err); 318 319 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, 320 lpm_tree); 321 if (err) 322 goto err_left_struct_set; 323 memcpy(&lpm_tree->prefix_usage, prefix_usage, 324 sizeof(lpm_tree->prefix_usage)); 325 return lpm_tree; 326 327 err_left_struct_set: 328 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 329 return ERR_PTR(err); 330 } 331 332 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, 333 struct mlxsw_sp_lpm_tree *lpm_tree) 334 { 335 return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 336 } 337 338 static struct mlxsw_sp_lpm_tree * 339 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, 340 struct mlxsw_sp_prefix_usage *prefix_usage, 341 enum mlxsw_sp_l3proto proto, bool one_reserved) 342 { 343 struct mlxsw_sp_lpm_tree *lpm_tree; 344 int i; 345 346 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 347 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 348 if (lpm_tree->ref_count != 0 && 349 lpm_tree->proto == proto && 350 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, 351 prefix_usage)) 352 goto inc_ref_count; 353 } 354 lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, 355 proto, one_reserved); 356 if (IS_ERR(lpm_tree)) 357 return lpm_tree; 358 359 inc_ref_count: 360 lpm_tree->ref_count++; 361 return lpm_tree; 362 } 363 364 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, 365 struct mlxsw_sp_lpm_tree *lpm_tree) 366 { 367 if (--lpm_tree->ref_count == 0) 368 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); 369 return 0; 370 } 371 372 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) 373 { 374 struct mlxsw_sp_lpm_tree *lpm_tree; 375 int i; 376 377 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 378 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 379 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; 380 } 381 } 382 383 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) 384 { 385 struct mlxsw_sp_vr *vr; 386 int i; 387 388 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 389 vr = &mlxsw_sp->router.vrs[i]; 390 if (!vr->used) 391 return vr; 392 } 393 return NULL; 394 } 395 396 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, 397 struct mlxsw_sp_vr *vr) 398 { 399 char raltb_pl[MLXSW_REG_RALTB_LEN]; 400 401 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 402 (enum mlxsw_reg_ralxx_protocol) vr->proto, 403 vr->lpm_tree->id); 404 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 405 } 406 407 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, 408 struct mlxsw_sp_vr *vr) 409 { 410 char raltb_pl[MLXSW_REG_RALTB_LEN]; 411 412 /* Bind to tree 0 which is default */ 413 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 414 (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); 415 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 416 } 417 418 static u32 mlxsw_sp_fix_tb_id(u32 tb_id) 419 { 420 /* For our purpose, squash main and local table into one */ 421 if (tb_id == RT_TABLE_LOCAL) 422 tb_id = RT_TABLE_MAIN; 423 return tb_id; 424 } 425 426 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, 427 u32 tb_id, 428 enum mlxsw_sp_l3proto proto) 429 { 430 struct mlxsw_sp_vr *vr; 431 int i; 432 433 tb_id = mlxsw_sp_fix_tb_id(tb_id); 434 435 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 436 vr = &mlxsw_sp->router.vrs[i]; 437 if (vr->used && vr->proto == proto && vr->tb_id == tb_id) 438 return vr; 439 } 440 return NULL; 441 } 442 443 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, 444 unsigned char prefix_len, 445 u32 tb_id, 446 enum mlxsw_sp_l3proto proto) 447 { 448 struct mlxsw_sp_prefix_usage req_prefix_usage; 449 struct mlxsw_sp_lpm_tree *lpm_tree; 450 struct mlxsw_sp_vr *vr; 451 int err; 452 453 vr = mlxsw_sp_vr_find_unused(mlxsw_sp); 454 if (!vr) 455 return ERR_PTR(-EBUSY); 456 vr->fib = mlxsw_sp_fib_create(); 457 if (IS_ERR(vr->fib)) 458 return ERR_CAST(vr->fib); 459 460 vr->proto = proto; 461 vr->tb_id = tb_id; 462 mlxsw_sp_prefix_usage_zero(&req_prefix_usage); 463 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 464 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, 465 proto, true); 466 if (IS_ERR(lpm_tree)) { 467 err = PTR_ERR(lpm_tree); 468 goto err_tree_get; 469 } 470 vr->lpm_tree = lpm_tree; 471 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 472 if (err) 473 goto err_tree_bind; 474 475 vr->used = true; 476 return vr; 477 478 err_tree_bind: 479 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 480 err_tree_get: 481 mlxsw_sp_fib_destroy(vr->fib); 482 483 return ERR_PTR(err); 484 } 485 486 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, 487 struct mlxsw_sp_vr *vr) 488 { 489 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 490 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 491 mlxsw_sp_fib_destroy(vr->fib); 492 vr->used = false; 493 } 494 495 static int 496 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, 497 struct mlxsw_sp_prefix_usage *req_prefix_usage) 498 { 499 struct mlxsw_sp_lpm_tree *lpm_tree; 500 501 if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, 502 &vr->lpm_tree->prefix_usage)) 503 return 0; 504 505 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, 506 vr->proto, false); 507 if (IS_ERR(lpm_tree)) { 508 /* We failed to get a tree according to the required 509 * prefix usage. However, the current tree might be still good 510 * for us if our requirement is subset of the prefixes used 511 * in the tree. 512 */ 513 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, 514 &vr->lpm_tree->prefix_usage)) 515 return 0; 516 return PTR_ERR(lpm_tree); 517 } 518 519 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 520 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 521 vr->lpm_tree = lpm_tree; 522 return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 523 } 524 525 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, 526 unsigned char prefix_len, 527 u32 tb_id, 528 enum mlxsw_sp_l3proto proto) 529 { 530 struct mlxsw_sp_vr *vr; 531 int err; 532 533 tb_id = mlxsw_sp_fix_tb_id(tb_id); 534 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); 535 if (!vr) { 536 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); 537 if (IS_ERR(vr)) 538 return vr; 539 } else { 540 struct mlxsw_sp_prefix_usage req_prefix_usage; 541 542 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, 543 &vr->fib->prefix_usage); 544 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 545 /* Need to replace LPM tree in case new prefix is required. */ 546 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 547 &req_prefix_usage); 548 if (err) 549 return ERR_PTR(err); 550 } 551 return vr; 552 } 553 554 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) 555 { 556 /* Destroy virtual router entity in case the associated FIB is empty 557 * and allow it to be used for other tables in future. Otherwise, 558 * check if some prefix usage did not disappear and change tree if 559 * that is the case. Note that in case new, smaller tree cannot be 560 * allocated, the original one will be kept being used. 561 */ 562 if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) 563 mlxsw_sp_vr_destroy(mlxsw_sp, vr); 564 else 565 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 566 &vr->fib->prefix_usage); 567 } 568 569 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) 570 { 571 struct mlxsw_sp_vr *vr; 572 u64 max_vrs; 573 int i; 574 575 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS)) 576 return -EIO; 577 578 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); 579 mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr), 580 GFP_KERNEL); 581 if (!mlxsw_sp->router.vrs) 582 return -ENOMEM; 583 584 for (i = 0; i < max_vrs; i++) { 585 vr = &mlxsw_sp->router.vrs[i]; 586 vr->id = i; 587 } 588 589 return 0; 590 } 591 592 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp); 593 594 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) 595 { 596 /* At this stage we're guaranteed not to have new incoming 597 * FIB notifications and the work queue is free from FIBs 598 * sitting on top of mlxsw netdevs. However, we can still 599 * have other FIBs queued. Flush the queue before flushing 600 * the device's tables. No need for locks, as we're the only 601 * writer. 602 */ 603 mlxsw_core_flush_owq(); 604 mlxsw_sp_router_fib_flush(mlxsw_sp); 605 kfree(mlxsw_sp->router.vrs); 606 } 607 608 struct mlxsw_sp_neigh_key { 609 struct neighbour *n; 610 }; 611 612 struct mlxsw_sp_neigh_entry { 613 struct rhash_head ht_node; 614 struct mlxsw_sp_neigh_key key; 615 u16 rif; 616 bool offloaded; 617 struct delayed_work dw; 618 struct mlxsw_sp_port *mlxsw_sp_port; 619 unsigned char ha[ETH_ALEN]; 620 struct list_head nexthop_list; /* list of nexthops using 621 * this neigh entry 622 */ 623 struct list_head nexthop_neighs_list_node; 624 }; 625 626 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { 627 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), 628 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), 629 .key_len = sizeof(struct mlxsw_sp_neigh_key), 630 }; 631 632 static int 633 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, 634 struct mlxsw_sp_neigh_entry *neigh_entry) 635 { 636 return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, 637 &neigh_entry->ht_node, 638 mlxsw_sp_neigh_ht_params); 639 } 640 641 static void 642 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, 643 struct mlxsw_sp_neigh_entry *neigh_entry) 644 { 645 rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, 646 &neigh_entry->ht_node, 647 mlxsw_sp_neigh_ht_params); 648 } 649 650 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); 651 652 static struct mlxsw_sp_neigh_entry * 653 mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif) 654 { 655 struct mlxsw_sp_neigh_entry *neigh_entry; 656 657 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); 658 if (!neigh_entry) 659 return NULL; 660 neigh_entry->key.n = n; 661 neigh_entry->rif = rif; 662 INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); 663 INIT_LIST_HEAD(&neigh_entry->nexthop_list); 664 return neigh_entry; 665 } 666 667 static void 668 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) 669 { 670 kfree(neigh_entry); 671 } 672 673 static struct mlxsw_sp_neigh_entry * 674 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) 675 { 676 struct mlxsw_sp_neigh_key key; 677 678 key.n = n; 679 return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, 680 &key, mlxsw_sp_neigh_ht_params); 681 } 682 683 int mlxsw_sp_router_neigh_construct(struct net_device *dev, 684 struct neighbour *n) 685 { 686 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); 687 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 688 struct mlxsw_sp_neigh_entry *neigh_entry; 689 struct mlxsw_sp_rif *r; 690 int err; 691 692 if (n->tbl != &arp_tbl) 693 return 0; 694 695 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 696 if (neigh_entry) 697 return 0; 698 699 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); 700 if (WARN_ON(!r)) 701 return -EINVAL; 702 703 neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif); 704 if (!neigh_entry) 705 return -ENOMEM; 706 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); 707 if (err) 708 goto err_neigh_entry_insert; 709 return 0; 710 711 err_neigh_entry_insert: 712 mlxsw_sp_neigh_entry_destroy(neigh_entry); 713 return err; 714 } 715 716 void mlxsw_sp_router_neigh_destroy(struct net_device *dev, 717 struct neighbour *n) 718 { 719 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); 720 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 721 struct mlxsw_sp_neigh_entry *neigh_entry; 722 723 if (n->tbl != &arp_tbl) 724 return; 725 726 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 727 if (!neigh_entry) 728 return; 729 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); 730 mlxsw_sp_neigh_entry_destroy(neigh_entry); 731 } 732 733 static void 734 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) 735 { 736 unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); 737 738 mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); 739 } 740 741 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, 742 char *rauhtd_pl, 743 int ent_index) 744 { 745 struct net_device *dev; 746 struct neighbour *n; 747 __be32 dipn; 748 u32 dip; 749 u16 rif; 750 751 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); 752 753 if (!mlxsw_sp->rifs[rif]) { 754 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); 755 return; 756 } 757 758 dipn = htonl(dip); 759 dev = mlxsw_sp->rifs[rif]->dev; 760 n = neigh_lookup(&arp_tbl, &dipn, dev); 761 if (!n) { 762 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", 763 &dip); 764 return; 765 } 766 767 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); 768 neigh_event_send(n, NULL); 769 neigh_release(n); 770 } 771 772 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, 773 char *rauhtd_pl, 774 int rec_index) 775 { 776 u8 num_entries; 777 int i; 778 779 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 780 rec_index); 781 /* Hardware starts counting at 0, so add 1. */ 782 num_entries++; 783 784 /* Each record consists of several neighbour entries. */ 785 for (i = 0; i < num_entries; i++) { 786 int ent_index; 787 788 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; 789 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, 790 ent_index); 791 } 792 793 } 794 795 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, 796 char *rauhtd_pl, int rec_index) 797 { 798 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { 799 case MLXSW_REG_RAUHTD_TYPE_IPV4: 800 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, 801 rec_index); 802 break; 803 case MLXSW_REG_RAUHTD_TYPE_IPV6: 804 WARN_ON_ONCE(1); 805 break; 806 } 807 } 808 809 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) 810 { 811 u8 num_rec, last_rec_index, num_entries; 812 813 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 814 last_rec_index = num_rec - 1; 815 816 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) 817 return false; 818 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == 819 MLXSW_REG_RAUHTD_TYPE_IPV6) 820 return true; 821 822 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 823 last_rec_index); 824 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) 825 return true; 826 return false; 827 } 828 829 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) 830 { 831 char *rauhtd_pl; 832 u8 num_rec; 833 int i, err; 834 835 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); 836 if (!rauhtd_pl) 837 return -ENOMEM; 838 839 /* Make sure the neighbour's netdev isn't removed in the 840 * process. 841 */ 842 rtnl_lock(); 843 do { 844 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); 845 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), 846 rauhtd_pl); 847 if (err) { 848 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); 849 break; 850 } 851 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 852 for (i = 0; i < num_rec; i++) 853 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, 854 i); 855 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); 856 rtnl_unlock(); 857 858 kfree(rauhtd_pl); 859 return err; 860 } 861 862 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) 863 { 864 struct mlxsw_sp_neigh_entry *neigh_entry; 865 866 /* Take RTNL mutex here to prevent lists from changes */ 867 rtnl_lock(); 868 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 869 nexthop_neighs_list_node) { 870 /* If this neigh have nexthops, make the kernel think this neigh 871 * is active regardless of the traffic. 872 */ 873 if (!list_empty(&neigh_entry->nexthop_list)) 874 neigh_event_send(neigh_entry->key.n, NULL); 875 } 876 rtnl_unlock(); 877 } 878 879 static void 880 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) 881 { 882 unsigned long interval = mlxsw_sp->router.neighs_update.interval; 883 884 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 885 msecs_to_jiffies(interval)); 886 } 887 888 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) 889 { 890 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 891 router.neighs_update.dw.work); 892 int err; 893 894 err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); 895 if (err) 896 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); 897 898 mlxsw_sp_router_neighs_update_nh(mlxsw_sp); 899 900 mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); 901 } 902 903 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) 904 { 905 struct mlxsw_sp_neigh_entry *neigh_entry; 906 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 907 router.nexthop_probe_dw.work); 908 909 /* Iterate over nexthop neighbours, find those who are unresolved and 910 * send arp on them. This solves the chicken-egg problem when 911 * the nexthop wouldn't get offloaded until the neighbor is resolved 912 * but it wouldn't get resolved ever in case traffic is flowing in HW 913 * using different nexthop. 914 * 915 * Take RTNL mutex here to prevent lists from changes. 916 */ 917 rtnl_lock(); 918 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 919 nexthop_neighs_list_node) { 920 if (!(neigh_entry->key.n->nud_state & NUD_VALID) && 921 !list_empty(&neigh_entry->nexthop_list)) 922 neigh_event_send(neigh_entry->key.n, NULL); 923 } 924 rtnl_unlock(); 925 926 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 927 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); 928 } 929 930 static void 931 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 932 struct mlxsw_sp_neigh_entry *neigh_entry, 933 bool removing); 934 935 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) 936 { 937 struct mlxsw_sp_neigh_entry *neigh_entry = 938 container_of(work, struct mlxsw_sp_neigh_entry, dw.work); 939 struct neighbour *n = neigh_entry->key.n; 940 struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; 941 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 942 char rauht_pl[MLXSW_REG_RAUHT_LEN]; 943 struct net_device *dev; 944 bool entry_connected; 945 u8 nud_state, dead; 946 bool updating; 947 bool removing; 948 bool adding; 949 u32 dip; 950 int err; 951 952 read_lock_bh(&n->lock); 953 dip = ntohl(*((__be32 *) n->primary_key)); 954 memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); 955 nud_state = n->nud_state; 956 dead = n->dead; 957 dev = n->dev; 958 read_unlock_bh(&n->lock); 959 960 entry_connected = nud_state & NUD_VALID && !dead; 961 adding = (!neigh_entry->offloaded) && entry_connected; 962 updating = neigh_entry->offloaded && entry_connected; 963 removing = neigh_entry->offloaded && !entry_connected; 964 965 if (adding || updating) { 966 mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, 967 neigh_entry->rif, 968 neigh_entry->ha, dip); 969 err = mlxsw_reg_write(mlxsw_sp->core, 970 MLXSW_REG(rauht), rauht_pl); 971 if (err) { 972 netdev_err(dev, "Could not add neigh %pI4h\n", &dip); 973 neigh_entry->offloaded = false; 974 } else { 975 neigh_entry->offloaded = true; 976 } 977 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); 978 } else if (removing) { 979 mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, 980 neigh_entry->rif, 981 neigh_entry->ha, dip); 982 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), 983 rauht_pl); 984 if (err) { 985 netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); 986 neigh_entry->offloaded = true; 987 } else { 988 neigh_entry->offloaded = false; 989 } 990 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); 991 } 992 993 neigh_release(n); 994 mlxsw_sp_port_dev_put(mlxsw_sp_port); 995 } 996 997 int mlxsw_sp_router_netevent_event(struct notifier_block *unused, 998 unsigned long event, void *ptr) 999 { 1000 struct mlxsw_sp_neigh_entry *neigh_entry; 1001 struct mlxsw_sp_port *mlxsw_sp_port; 1002 struct mlxsw_sp *mlxsw_sp; 1003 unsigned long interval; 1004 struct net_device *dev; 1005 struct neigh_parms *p; 1006 struct neighbour *n; 1007 u32 dip; 1008 1009 switch (event) { 1010 case NETEVENT_DELAY_PROBE_TIME_UPDATE: 1011 p = ptr; 1012 1013 /* We don't care about changes in the default table. */ 1014 if (!p->dev || p->tbl != &arp_tbl) 1015 return NOTIFY_DONE; 1016 1017 /* We are in atomic context and can't take RTNL mutex, 1018 * so use RCU variant to walk the device chain. 1019 */ 1020 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); 1021 if (!mlxsw_sp_port) 1022 return NOTIFY_DONE; 1023 1024 mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 1025 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); 1026 mlxsw_sp->router.neighs_update.interval = interval; 1027 1028 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1029 break; 1030 case NETEVENT_NEIGH_UPDATE: 1031 n = ptr; 1032 dev = n->dev; 1033 1034 if (n->tbl != &arp_tbl) 1035 return NOTIFY_DONE; 1036 1037 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); 1038 if (!mlxsw_sp_port) 1039 return NOTIFY_DONE; 1040 1041 mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 1042 dip = ntohl(*((__be32 *) n->primary_key)); 1043 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 1044 if (WARN_ON(!neigh_entry)) { 1045 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1046 return NOTIFY_DONE; 1047 } 1048 neigh_entry->mlxsw_sp_port = mlxsw_sp_port; 1049 1050 /* Take a reference to ensure the neighbour won't be 1051 * destructed until we drop the reference in delayed 1052 * work. 1053 */ 1054 neigh_clone(n); 1055 if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { 1056 neigh_release(n); 1057 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1058 } 1059 break; 1060 } 1061 1062 return NOTIFY_DONE; 1063 } 1064 1065 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) 1066 { 1067 int err; 1068 1069 err = rhashtable_init(&mlxsw_sp->router.neigh_ht, 1070 &mlxsw_sp_neigh_ht_params); 1071 if (err) 1072 return err; 1073 1074 /* Initialize the polling interval according to the default 1075 * table. 1076 */ 1077 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); 1078 1079 /* Create the delayed works for the activity_update */ 1080 INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, 1081 mlxsw_sp_router_neighs_update_work); 1082 INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, 1083 mlxsw_sp_router_probe_unresolved_nexthops); 1084 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); 1085 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); 1086 return 0; 1087 } 1088 1089 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) 1090 { 1091 cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); 1092 cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); 1093 rhashtable_destroy(&mlxsw_sp->router.neigh_ht); 1094 } 1095 1096 struct mlxsw_sp_nexthop { 1097 struct list_head neigh_list_node; /* member of neigh entry list */ 1098 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group 1099 * this belongs to 1100 */ 1101 u8 should_offload:1, /* set indicates this neigh is connected and 1102 * should be put to KVD linear area of this group. 1103 */ 1104 offloaded:1, /* set in case the neigh is actually put into 1105 * KVD linear area of this group. 1106 */ 1107 update:1; /* set indicates that MAC of this neigh should be 1108 * updated in HW 1109 */ 1110 struct mlxsw_sp_neigh_entry *neigh_entry; 1111 }; 1112 1113 struct mlxsw_sp_nexthop_group { 1114 struct list_head list; /* node in mlxsw->router.nexthop_group_list */ 1115 struct list_head fib_list; /* list of fib entries that use this group */ 1116 u8 adj_index_valid:1; 1117 u32 adj_index; 1118 u16 ecmp_size; 1119 u16 count; 1120 struct mlxsw_sp_nexthop nexthops[0]; 1121 }; 1122 1123 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, 1124 struct mlxsw_sp_vr *vr, 1125 u32 adj_index, u16 ecmp_size, 1126 u32 new_adj_index, 1127 u16 new_ecmp_size) 1128 { 1129 char raleu_pl[MLXSW_REG_RALEU_LEN]; 1130 1131 mlxsw_reg_raleu_pack(raleu_pl, 1132 (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, 1133 adj_index, ecmp_size, new_adj_index, 1134 new_ecmp_size); 1135 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); 1136 } 1137 1138 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, 1139 struct mlxsw_sp_nexthop_group *nh_grp, 1140 u32 old_adj_index, u16 old_ecmp_size) 1141 { 1142 struct mlxsw_sp_fib_entry *fib_entry; 1143 struct mlxsw_sp_vr *vr = NULL; 1144 int err; 1145 1146 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1147 if (vr == fib_entry->vr) 1148 continue; 1149 vr = fib_entry->vr; 1150 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, 1151 old_adj_index, 1152 old_ecmp_size, 1153 nh_grp->adj_index, 1154 nh_grp->ecmp_size); 1155 if (err) 1156 return err; 1157 } 1158 return 0; 1159 } 1160 1161 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, 1162 struct mlxsw_sp_nexthop *nh) 1163 { 1164 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1165 char ratr_pl[MLXSW_REG_RATR_LEN]; 1166 1167 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, 1168 true, adj_index, neigh_entry->rif); 1169 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); 1170 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); 1171 } 1172 1173 static int 1174 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, 1175 struct mlxsw_sp_nexthop_group *nh_grp) 1176 { 1177 u32 adj_index = nh_grp->adj_index; /* base */ 1178 struct mlxsw_sp_nexthop *nh; 1179 int i; 1180 int err; 1181 1182 for (i = 0; i < nh_grp->count; i++) { 1183 nh = &nh_grp->nexthops[i]; 1184 1185 if (!nh->should_offload) { 1186 nh->offloaded = 0; 1187 continue; 1188 } 1189 1190 if (nh->update) { 1191 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, 1192 adj_index, nh); 1193 if (err) 1194 return err; 1195 nh->update = 0; 1196 nh->offloaded = 1; 1197 } 1198 adj_index++; 1199 } 1200 return 0; 1201 } 1202 1203 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1204 struct mlxsw_sp_fib_entry *fib_entry); 1205 1206 static int 1207 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, 1208 struct mlxsw_sp_nexthop_group *nh_grp) 1209 { 1210 struct mlxsw_sp_fib_entry *fib_entry; 1211 int err; 1212 1213 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1214 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 1215 if (err) 1216 return err; 1217 } 1218 return 0; 1219 } 1220 1221 static void 1222 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, 1223 struct mlxsw_sp_nexthop_group *nh_grp) 1224 { 1225 struct mlxsw_sp_nexthop *nh; 1226 bool offload_change = false; 1227 u32 adj_index; 1228 u16 ecmp_size = 0; 1229 bool old_adj_index_valid; 1230 u32 old_adj_index; 1231 u16 old_ecmp_size; 1232 int ret; 1233 int i; 1234 int err; 1235 1236 for (i = 0; i < nh_grp->count; i++) { 1237 nh = &nh_grp->nexthops[i]; 1238 1239 if (nh->should_offload ^ nh->offloaded) { 1240 offload_change = true; 1241 if (nh->should_offload) 1242 nh->update = 1; 1243 } 1244 if (nh->should_offload) 1245 ecmp_size++; 1246 } 1247 if (!offload_change) { 1248 /* Nothing was added or removed, so no need to reallocate. Just 1249 * update MAC on existing adjacency indexes. 1250 */ 1251 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); 1252 if (err) { 1253 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1254 goto set_trap; 1255 } 1256 return; 1257 } 1258 if (!ecmp_size) 1259 /* No neigh of this group is connected so we just set 1260 * the trap and let everthing flow through kernel. 1261 */ 1262 goto set_trap; 1263 1264 ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); 1265 if (ret < 0) { 1266 /* We ran out of KVD linear space, just set the 1267 * trap and let everything flow through kernel. 1268 */ 1269 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); 1270 goto set_trap; 1271 } 1272 adj_index = ret; 1273 old_adj_index_valid = nh_grp->adj_index_valid; 1274 old_adj_index = nh_grp->adj_index; 1275 old_ecmp_size = nh_grp->ecmp_size; 1276 nh_grp->adj_index_valid = 1; 1277 nh_grp->adj_index = adj_index; 1278 nh_grp->ecmp_size = ecmp_size; 1279 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); 1280 if (err) { 1281 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1282 goto set_trap; 1283 } 1284 1285 if (!old_adj_index_valid) { 1286 /* The trap was set for fib entries, so we have to call 1287 * fib entry update to unset it and use adjacency index. 1288 */ 1289 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1290 if (err) { 1291 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); 1292 goto set_trap; 1293 } 1294 return; 1295 } 1296 1297 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, 1298 old_adj_index, old_ecmp_size); 1299 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); 1300 if (err) { 1301 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); 1302 goto set_trap; 1303 } 1304 return; 1305 1306 set_trap: 1307 old_adj_index_valid = nh_grp->adj_index_valid; 1308 nh_grp->adj_index_valid = 0; 1309 for (i = 0; i < nh_grp->count; i++) { 1310 nh = &nh_grp->nexthops[i]; 1311 nh->offloaded = 0; 1312 } 1313 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1314 if (err) 1315 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); 1316 if (old_adj_index_valid) 1317 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); 1318 } 1319 1320 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, 1321 bool removing) 1322 { 1323 if (!removing && !nh->should_offload) 1324 nh->should_offload = 1; 1325 else if (removing && nh->offloaded) 1326 nh->should_offload = 0; 1327 nh->update = 1; 1328 } 1329 1330 static void 1331 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 1332 struct mlxsw_sp_neigh_entry *neigh_entry, 1333 bool removing) 1334 { 1335 struct mlxsw_sp_nexthop *nh; 1336 1337 /* Take RTNL mutex here to prevent lists from changes */ 1338 rtnl_lock(); 1339 list_for_each_entry(nh, &neigh_entry->nexthop_list, 1340 neigh_list_node) { 1341 __mlxsw_sp_nexthop_neigh_update(nh, removing); 1342 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1343 } 1344 rtnl_unlock(); 1345 } 1346 1347 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, 1348 struct mlxsw_sp_nexthop_group *nh_grp, 1349 struct mlxsw_sp_nexthop *nh, 1350 struct fib_nh *fib_nh) 1351 { 1352 struct mlxsw_sp_neigh_entry *neigh_entry; 1353 struct net_device *dev = fib_nh->nh_dev; 1354 struct neighbour *n; 1355 u8 nud_state, dead; 1356 1357 /* Take a reference of neigh here ensuring that neigh would 1358 * not be detructed before the nexthop entry is finished. 1359 * The reference is taken either in neigh_lookup() or 1360 * in neith_create() in case n is not found. 1361 */ 1362 n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev); 1363 if (!n) { 1364 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev); 1365 if (IS_ERR(n)) 1366 return PTR_ERR(n); 1367 neigh_event_send(n, NULL); 1368 } 1369 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 1370 if (!neigh_entry) { 1371 neigh_release(n); 1372 return -EINVAL; 1373 } 1374 1375 /* If that is the first nexthop connected to that neigh, add to 1376 * nexthop_neighs_list 1377 */ 1378 if (list_empty(&neigh_entry->nexthop_list)) 1379 list_add_tail(&neigh_entry->nexthop_neighs_list_node, 1380 &mlxsw_sp->router.nexthop_neighs_list); 1381 1382 nh->nh_grp = nh_grp; 1383 nh->neigh_entry = neigh_entry; 1384 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); 1385 read_lock_bh(&n->lock); 1386 nud_state = n->nud_state; 1387 dead = n->dead; 1388 read_unlock_bh(&n->lock); 1389 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead)); 1390 1391 return 0; 1392 } 1393 1394 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, 1395 struct mlxsw_sp_nexthop *nh) 1396 { 1397 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1398 1399 __mlxsw_sp_nexthop_neigh_update(nh, true); 1400 list_del(&nh->neigh_list_node); 1401 1402 /* If that is the last nexthop connected to that neigh, remove from 1403 * nexthop_neighs_list 1404 */ 1405 if (list_empty(&nh->neigh_entry->nexthop_list)) 1406 list_del(&nh->neigh_entry->nexthop_neighs_list_node); 1407 1408 neigh_release(neigh_entry->key.n); 1409 } 1410 1411 static struct mlxsw_sp_nexthop_group * 1412 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1413 { 1414 struct mlxsw_sp_nexthop_group *nh_grp; 1415 struct mlxsw_sp_nexthop *nh; 1416 struct fib_nh *fib_nh; 1417 size_t alloc_size; 1418 int i; 1419 int err; 1420 1421 alloc_size = sizeof(*nh_grp) + 1422 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); 1423 nh_grp = kzalloc(alloc_size, GFP_KERNEL); 1424 if (!nh_grp) 1425 return ERR_PTR(-ENOMEM); 1426 INIT_LIST_HEAD(&nh_grp->fib_list); 1427 nh_grp->count = fi->fib_nhs; 1428 for (i = 0; i < nh_grp->count; i++) { 1429 nh = &nh_grp->nexthops[i]; 1430 fib_nh = &fi->fib_nh[i]; 1431 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); 1432 if (err) 1433 goto err_nexthop_init; 1434 } 1435 list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); 1436 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1437 return nh_grp; 1438 1439 err_nexthop_init: 1440 for (i--; i >= 0; i--) 1441 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1442 kfree(nh_grp); 1443 return ERR_PTR(err); 1444 } 1445 1446 static void 1447 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, 1448 struct mlxsw_sp_nexthop_group *nh_grp) 1449 { 1450 struct mlxsw_sp_nexthop *nh; 1451 int i; 1452 1453 list_del(&nh_grp->list); 1454 for (i = 0; i < nh_grp->count; i++) { 1455 nh = &nh_grp->nexthops[i]; 1456 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1457 } 1458 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1459 WARN_ON_ONCE(nh_grp->adj_index_valid); 1460 kfree(nh_grp); 1461 } 1462 1463 static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, 1464 struct fib_info *fi) 1465 { 1466 int i; 1467 1468 for (i = 0; i < fi->fib_nhs; i++) { 1469 struct fib_nh *fib_nh = &fi->fib_nh[i]; 1470 struct neighbour *n = nh->neigh_entry->key.n; 1471 1472 if (memcmp(n->primary_key, &fib_nh->nh_gw, 1473 sizeof(fib_nh->nh_gw)) == 0 && 1474 n->dev == fib_nh->nh_dev) 1475 return true; 1476 } 1477 return false; 1478 } 1479 1480 static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, 1481 struct fib_info *fi) 1482 { 1483 int i; 1484 1485 if (nh_grp->count != fi->fib_nhs) 1486 return false; 1487 for (i = 0; i < nh_grp->count; i++) { 1488 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; 1489 1490 if (!mlxsw_sp_nexthop_match(nh, fi)) 1491 return false; 1492 } 1493 return true; 1494 } 1495 1496 static struct mlxsw_sp_nexthop_group * 1497 mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1498 { 1499 struct mlxsw_sp_nexthop_group *nh_grp; 1500 1501 list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, 1502 list) { 1503 if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) 1504 return nh_grp; 1505 } 1506 return NULL; 1507 } 1508 1509 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, 1510 struct mlxsw_sp_fib_entry *fib_entry, 1511 struct fib_info *fi) 1512 { 1513 struct mlxsw_sp_nexthop_group *nh_grp; 1514 1515 nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); 1516 if (!nh_grp) { 1517 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); 1518 if (IS_ERR(nh_grp)) 1519 return PTR_ERR(nh_grp); 1520 } 1521 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); 1522 fib_entry->nh_group = nh_grp; 1523 return 0; 1524 } 1525 1526 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, 1527 struct mlxsw_sp_fib_entry *fib_entry) 1528 { 1529 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; 1530 1531 list_del(&fib_entry->nexthop_group_node); 1532 if (!list_empty(&nh_grp->fib_list)) 1533 return; 1534 mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); 1535 } 1536 1537 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, 1538 struct mlxsw_sp_fib_entry *fib_entry, 1539 enum mlxsw_reg_ralue_op op) 1540 { 1541 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1542 u32 *p_dip = (u32 *) fib_entry->key.addr; 1543 struct mlxsw_sp_vr *vr = fib_entry->vr; 1544 enum mlxsw_reg_ralue_trap_action trap_action; 1545 u16 trap_id = 0; 1546 u32 adjacency_index = 0; 1547 u16 ecmp_size = 0; 1548 1549 /* In case the nexthop group adjacency index is valid, use it 1550 * with provided ECMP size. Otherwise, setup trap and pass 1551 * traffic to kernel. 1552 */ 1553 if (fib_entry->nh_group->adj_index_valid) { 1554 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1555 adjacency_index = fib_entry->nh_group->adj_index; 1556 ecmp_size = fib_entry->nh_group->ecmp_size; 1557 } else { 1558 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; 1559 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; 1560 } 1561 1562 mlxsw_reg_ralue_pack4(ralue_pl, 1563 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1564 vr->id, fib_entry->key.prefix_len, *p_dip); 1565 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, 1566 adjacency_index, ecmp_size); 1567 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1568 } 1569 1570 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, 1571 struct mlxsw_sp_fib_entry *fib_entry, 1572 enum mlxsw_reg_ralue_op op) 1573 { 1574 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1575 u32 *p_dip = (u32 *) fib_entry->key.addr; 1576 struct mlxsw_sp_vr *vr = fib_entry->vr; 1577 1578 mlxsw_reg_ralue_pack4(ralue_pl, 1579 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1580 vr->id, fib_entry->key.prefix_len, *p_dip); 1581 mlxsw_reg_ralue_act_local_pack(ralue_pl, 1582 MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, 1583 fib_entry->rif); 1584 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1585 } 1586 1587 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, 1588 struct mlxsw_sp_fib_entry *fib_entry, 1589 enum mlxsw_reg_ralue_op op) 1590 { 1591 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1592 u32 *p_dip = (u32 *) fib_entry->key.addr; 1593 struct mlxsw_sp_vr *vr = fib_entry->vr; 1594 1595 mlxsw_reg_ralue_pack4(ralue_pl, 1596 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1597 vr->id, fib_entry->key.prefix_len, *p_dip); 1598 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 1599 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1600 } 1601 1602 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, 1603 struct mlxsw_sp_fib_entry *fib_entry, 1604 enum mlxsw_reg_ralue_op op) 1605 { 1606 switch (fib_entry->type) { 1607 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: 1608 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); 1609 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: 1610 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); 1611 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: 1612 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); 1613 } 1614 return -EINVAL; 1615 } 1616 1617 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, 1618 struct mlxsw_sp_fib_entry *fib_entry, 1619 enum mlxsw_reg_ralue_op op) 1620 { 1621 switch (fib_entry->vr->proto) { 1622 case MLXSW_SP_L3_PROTO_IPV4: 1623 return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); 1624 case MLXSW_SP_L3_PROTO_IPV6: 1625 return -EINVAL; 1626 } 1627 return -EINVAL; 1628 } 1629 1630 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1631 struct mlxsw_sp_fib_entry *fib_entry) 1632 { 1633 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1634 MLXSW_REG_RALUE_OP_WRITE_WRITE); 1635 } 1636 1637 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, 1638 struct mlxsw_sp_fib_entry *fib_entry) 1639 { 1640 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1641 MLXSW_REG_RALUE_OP_WRITE_DELETE); 1642 } 1643 1644 static int 1645 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, 1646 const struct fib_entry_notifier_info *fen_info, 1647 struct mlxsw_sp_fib_entry *fib_entry) 1648 { 1649 struct fib_info *fi = fen_info->fi; 1650 struct mlxsw_sp_rif *r = NULL; 1651 int nhsel; 1652 int err; 1653 1654 if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { 1655 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1656 return 0; 1657 } 1658 if (fen_info->type != RTN_UNICAST) 1659 return -EINVAL; 1660 1661 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1662 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 1663 1664 if (!nh->nh_dev) 1665 continue; 1666 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev); 1667 if (!r) { 1668 /* In case router interface is not found for 1669 * at least one of the nexthops, that means 1670 * the nexthop points to some device unrelated 1671 * to us. Set trap and pass the packets for 1672 * this prefix to kernel. 1673 */ 1674 break; 1675 } 1676 } 1677 1678 if (!r) { 1679 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1680 return 0; 1681 } 1682 1683 if (fi->fib_scope != RT_SCOPE_UNIVERSE) { 1684 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; 1685 fib_entry->rif = r->rif; 1686 } else { 1687 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; 1688 err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); 1689 if (err) 1690 return err; 1691 } 1692 fib_info_offload_inc(fen_info->fi); 1693 return 0; 1694 } 1695 1696 static void 1697 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, 1698 struct mlxsw_sp_fib_entry *fib_entry) 1699 { 1700 if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) 1701 fib_info_offload_dec(fib_entry->fi); 1702 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) 1703 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); 1704 } 1705 1706 static struct mlxsw_sp_fib_entry * 1707 mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, 1708 const struct fib_entry_notifier_info *fen_info) 1709 { 1710 struct mlxsw_sp_fib_entry *fib_entry; 1711 struct fib_info *fi = fen_info->fi; 1712 struct mlxsw_sp_vr *vr; 1713 int err; 1714 1715 vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, 1716 MLXSW_SP_L3_PROTO_IPV4); 1717 if (IS_ERR(vr)) 1718 return ERR_CAST(vr); 1719 1720 fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, 1721 sizeof(fen_info->dst), 1722 fen_info->dst_len, fi->fib_dev); 1723 if (fib_entry) { 1724 /* Already exists, just take a reference */ 1725 fib_entry->ref_count++; 1726 return fib_entry; 1727 } 1728 fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst, 1729 sizeof(fen_info->dst), 1730 fen_info->dst_len, fi->fib_dev); 1731 if (!fib_entry) { 1732 err = -ENOMEM; 1733 goto err_fib_entry_create; 1734 } 1735 fib_entry->vr = vr; 1736 fib_entry->fi = fi; 1737 fib_entry->ref_count = 1; 1738 1739 err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry); 1740 if (err) 1741 goto err_fib4_entry_init; 1742 1743 return fib_entry; 1744 1745 err_fib4_entry_init: 1746 mlxsw_sp_fib_entry_destroy(fib_entry); 1747 err_fib_entry_create: 1748 mlxsw_sp_vr_put(mlxsw_sp, vr); 1749 1750 return ERR_PTR(err); 1751 } 1752 1753 static struct mlxsw_sp_fib_entry * 1754 mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, 1755 const struct fib_entry_notifier_info *fen_info) 1756 { 1757 struct mlxsw_sp_vr *vr; 1758 1759 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id, 1760 MLXSW_SP_L3_PROTO_IPV4); 1761 if (!vr) 1762 return NULL; 1763 1764 return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, 1765 sizeof(fen_info->dst), 1766 fen_info->dst_len, 1767 fen_info->fi->fib_dev); 1768 } 1769 1770 static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, 1771 struct mlxsw_sp_fib_entry *fib_entry) 1772 { 1773 struct mlxsw_sp_vr *vr = fib_entry->vr; 1774 1775 if (--fib_entry->ref_count == 0) { 1776 mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); 1777 mlxsw_sp_fib_entry_destroy(fib_entry); 1778 } 1779 mlxsw_sp_vr_put(mlxsw_sp, vr); 1780 } 1781 1782 static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp, 1783 struct mlxsw_sp_fib_entry *fib_entry) 1784 { 1785 unsigned int last_ref_count; 1786 1787 do { 1788 last_ref_count = fib_entry->ref_count; 1789 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1790 } while (last_ref_count != 1); 1791 } 1792 1793 static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, 1794 struct fib_entry_notifier_info *fen_info) 1795 { 1796 struct mlxsw_sp_fib_entry *fib_entry; 1797 struct mlxsw_sp_vr *vr; 1798 int err; 1799 1800 if (mlxsw_sp->router.aborted) 1801 return 0; 1802 1803 fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info); 1804 if (IS_ERR(fib_entry)) { 1805 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n"); 1806 return PTR_ERR(fib_entry); 1807 } 1808 1809 if (fib_entry->ref_count != 1) 1810 return 0; 1811 1812 vr = fib_entry->vr; 1813 err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); 1814 if (err) { 1815 dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n"); 1816 goto err_fib_entry_insert; 1817 } 1818 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 1819 if (err) 1820 goto err_fib_entry_add; 1821 return 0; 1822 1823 err_fib_entry_add: 1824 mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); 1825 err_fib_entry_insert: 1826 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1827 return err; 1828 } 1829 1830 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, 1831 struct fib_entry_notifier_info *fen_info) 1832 { 1833 struct mlxsw_sp_fib_entry *fib_entry; 1834 1835 if (mlxsw_sp->router.aborted) 1836 return; 1837 1838 fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); 1839 if (!fib_entry) 1840 return; 1841 1842 if (fib_entry->ref_count == 1) { 1843 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); 1844 mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry); 1845 } 1846 1847 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1848 } 1849 1850 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) 1851 { 1852 char ralta_pl[MLXSW_REG_RALTA_LEN]; 1853 char ralst_pl[MLXSW_REG_RALST_LEN]; 1854 char raltb_pl[MLXSW_REG_RALTB_LEN]; 1855 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1856 int err; 1857 1858 mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, 1859 MLXSW_SP_LPM_TREE_MIN); 1860 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 1861 if (err) 1862 return err; 1863 1864 mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); 1865 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 1866 if (err) 1867 return err; 1868 1869 mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 1870 MLXSW_SP_LPM_TREE_MIN); 1871 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 1872 if (err) 1873 return err; 1874 1875 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, 1876 MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); 1877 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 1878 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1879 } 1880 1881 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) 1882 { 1883 struct mlxsw_sp_fib_entry *fib_entry; 1884 struct mlxsw_sp_fib_entry *tmp; 1885 struct mlxsw_sp_vr *vr; 1886 int i; 1887 1888 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 1889 vr = &mlxsw_sp->router.vrs[i]; 1890 1891 if (!vr->used) 1892 continue; 1893 1894 list_for_each_entry_safe(fib_entry, tmp, 1895 &vr->fib->entry_list, list) { 1896 bool do_break = &tmp->list == &vr->fib->entry_list; 1897 1898 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); 1899 mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, 1900 fib_entry); 1901 mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry); 1902 if (do_break) 1903 break; 1904 } 1905 } 1906 } 1907 1908 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) 1909 { 1910 int err; 1911 1912 if (mlxsw_sp->router.aborted) 1913 return; 1914 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); 1915 mlxsw_sp_router_fib_flush(mlxsw_sp); 1916 mlxsw_sp->router.aborted = true; 1917 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); 1918 if (err) 1919 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); 1920 } 1921 1922 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 1923 { 1924 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 1925 u64 max_rifs; 1926 int err; 1927 1928 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) 1929 return -EIO; 1930 1931 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); 1932 mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), 1933 GFP_KERNEL); 1934 if (!mlxsw_sp->rifs) 1935 return -ENOMEM; 1936 1937 mlxsw_reg_rgcr_pack(rgcr_pl, true); 1938 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); 1939 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 1940 if (err) 1941 goto err_rgcr_fail; 1942 1943 return 0; 1944 1945 err_rgcr_fail: 1946 kfree(mlxsw_sp->rifs); 1947 return err; 1948 } 1949 1950 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 1951 { 1952 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 1953 int i; 1954 1955 mlxsw_reg_rgcr_pack(rgcr_pl, false); 1956 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 1957 1958 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) 1959 WARN_ON_ONCE(mlxsw_sp->rifs[i]); 1960 1961 kfree(mlxsw_sp->rifs); 1962 } 1963 1964 struct mlxsw_sp_fib_event_work { 1965 struct delayed_work dw; 1966 struct fib_entry_notifier_info fen_info; 1967 struct mlxsw_sp *mlxsw_sp; 1968 unsigned long event; 1969 }; 1970 1971 static void mlxsw_sp_router_fib_event_work(struct work_struct *work) 1972 { 1973 struct mlxsw_sp_fib_event_work *fib_work = 1974 container_of(work, struct mlxsw_sp_fib_event_work, dw.work); 1975 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; 1976 int err; 1977 1978 /* Protect internal structures from changes */ 1979 rtnl_lock(); 1980 switch (fib_work->event) { 1981 case FIB_EVENT_ENTRY_ADD: 1982 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info); 1983 if (err) 1984 mlxsw_sp_router_fib4_abort(mlxsw_sp); 1985 fib_info_put(fib_work->fen_info.fi); 1986 break; 1987 case FIB_EVENT_ENTRY_DEL: 1988 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); 1989 fib_info_put(fib_work->fen_info.fi); 1990 break; 1991 case FIB_EVENT_RULE_ADD: /* fall through */ 1992 case FIB_EVENT_RULE_DEL: 1993 mlxsw_sp_router_fib4_abort(mlxsw_sp); 1994 break; 1995 } 1996 rtnl_unlock(); 1997 kfree(fib_work); 1998 } 1999 2000 /* Called with rcu_read_lock() */ 2001 static int mlxsw_sp_router_fib_event(struct notifier_block *nb, 2002 unsigned long event, void *ptr) 2003 { 2004 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 2005 struct mlxsw_sp_fib_event_work *fib_work; 2006 struct fib_notifier_info *info = ptr; 2007 2008 if (!net_eq(info->net, &init_net)) 2009 return NOTIFY_DONE; 2010 2011 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); 2012 if (WARN_ON(!fib_work)) 2013 return NOTIFY_BAD; 2014 2015 INIT_DELAYED_WORK(&fib_work->dw, mlxsw_sp_router_fib_event_work); 2016 fib_work->mlxsw_sp = mlxsw_sp; 2017 fib_work->event = event; 2018 2019 switch (event) { 2020 case FIB_EVENT_ENTRY_ADD: /* fall through */ 2021 case FIB_EVENT_ENTRY_DEL: 2022 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); 2023 /* Take referece on fib_info to prevent it from being 2024 * freed while work is queued. Release it afterwards. 2025 */ 2026 fib_info_hold(fib_work->fen_info.fi); 2027 break; 2028 } 2029 2030 mlxsw_core_schedule_odw(&fib_work->dw, 0); 2031 2032 return NOTIFY_DONE; 2033 } 2034 2035 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) 2036 { 2037 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 2038 2039 /* Flush pending FIB notifications and then flush the device's 2040 * table before requesting another dump. The FIB notification 2041 * block is unregistered, so no need to take RTNL. 2042 */ 2043 mlxsw_core_flush_owq(); 2044 mlxsw_sp_router_fib_flush(mlxsw_sp); 2045 } 2046 2047 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 2048 { 2049 int err; 2050 2051 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); 2052 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); 2053 err = __mlxsw_sp_router_init(mlxsw_sp); 2054 if (err) 2055 return err; 2056 2057 mlxsw_sp_lpm_init(mlxsw_sp); 2058 err = mlxsw_sp_vrs_init(mlxsw_sp); 2059 if (err) 2060 goto err_vrs_init; 2061 2062 err = mlxsw_sp_neigh_init(mlxsw_sp); 2063 if (err) 2064 goto err_neigh_init; 2065 2066 mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; 2067 err = register_fib_notifier(&mlxsw_sp->fib_nb, 2068 mlxsw_sp_router_fib_dump_flush); 2069 if (err) 2070 goto err_register_fib_notifier; 2071 2072 return 0; 2073 2074 err_register_fib_notifier: 2075 mlxsw_sp_neigh_fini(mlxsw_sp); 2076 err_neigh_init: 2077 mlxsw_sp_vrs_fini(mlxsw_sp); 2078 err_vrs_init: 2079 __mlxsw_sp_router_fini(mlxsw_sp); 2080 return err; 2081 } 2082 2083 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 2084 { 2085 unregister_fib_notifier(&mlxsw_sp->fib_nb); 2086 mlxsw_sp_neigh_fini(mlxsw_sp); 2087 mlxsw_sp_vrs_fini(mlxsw_sp); 2088 __mlxsw_sp_router_fini(mlxsw_sp); 2089 } 2090