1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Generic nexthop implementation 4 * 5 * Copyright (c) 2017-19 Cumulus Networks 6 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 7 */ 8 9 #ifndef __LINUX_NEXTHOP_H 10 #define __LINUX_NEXTHOP_H 11 12 #include <linux/netdevice.h> 13 #include <linux/notifier.h> 14 #include <linux/route.h> 15 #include <linux/types.h> 16 #include <net/ip_fib.h> 17 #include <net/ip6_fib.h> 18 #include <net/netlink.h> 19 20 #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK 21 22 struct nexthop; 23 24 struct nh_config { 25 u32 nh_id; 26 27 u8 nh_family; 28 u8 nh_protocol; 29 u8 nh_blackhole; 30 u8 nh_fdb; 31 u32 nh_flags; 32 33 int nh_ifindex; 34 struct net_device *dev; 35 36 union { 37 __be32 ipv4; 38 struct in6_addr ipv6; 39 } gw; 40 41 struct nlattr *nh_grp; 42 u16 nh_grp_type; 43 u16 nh_grp_res_num_buckets; 44 unsigned long nh_grp_res_idle_timer; 45 unsigned long nh_grp_res_unbalanced_timer; 46 bool nh_grp_res_has_num_buckets; 47 bool nh_grp_res_has_idle_timer; 48 bool nh_grp_res_has_unbalanced_timer; 49 50 struct nlattr *nh_encap; 51 u16 nh_encap_type; 52 53 u32 nlflags; 54 struct nl_info nlinfo; 55 }; 56 57 struct nh_info { 58 struct hlist_node dev_hash; /* entry on netns devhash */ 59 struct nexthop *nh_parent; 60 61 u8 family; 62 bool reject_nh; 63 bool fdb_nh; 64 65 union { 66 struct fib_nh_common fib_nhc; 67 struct fib_nh fib_nh; 68 struct fib6_nh fib6_nh; 69 }; 70 }; 71 72 struct nh_res_bucket { 73 struct nh_grp_entry __rcu *nh_entry; 74 atomic_long_t used_time; 75 unsigned long migrated_time; 76 bool occupied; 77 u8 nh_flags; 78 }; 79 80 struct nh_res_table { 81 struct net *net; 82 u32 nhg_id; 83 struct delayed_work upkeep_dw; 84 85 /* List of NHGEs that have too few buckets ("uw" for underweight). 86 * Reclaimed buckets will be given to entries in this list. 87 */ 88 struct list_head uw_nh_entries; 89 unsigned long unbalanced_since; 90 91 u32 idle_timer; 92 u32 unbalanced_timer; 93 94 u16 num_nh_buckets; 95 struct nh_res_bucket nh_buckets[]; 96 }; 97 98 struct nh_grp_entry { 99 struct nexthop *nh; 100 u8 weight; 101 102 union { 103 struct { 104 atomic_t upper_bound; 105 } hthr; 106 struct { 107 /* Member on uw_nh_entries. */ 108 struct list_head uw_nh_entry; 109 110 u16 count_buckets; 111 u16 wants_buckets; 112 } res; 113 }; 114 115 struct list_head nh_list; 116 struct nexthop *nh_parent; /* nexthop of group with this entry */ 117 }; 118 119 struct nh_group { 120 struct nh_group *spare; /* spare group for removals */ 121 u16 num_nh; 122 bool is_multipath; 123 bool hash_threshold; 124 bool resilient; 125 bool fdb_nh; 126 bool has_v4; 127 128 struct nh_res_table __rcu *res_table; 129 struct nh_grp_entry nh_entries[]; 130 }; 131 132 struct nexthop { 133 struct rb_node rb_node; /* entry on netns rbtree */ 134 struct list_head fi_list; /* v4 entries using nh */ 135 struct list_head f6i_list; /* v6 entries using nh */ 136 struct list_head fdb_list; /* fdb entries using this nh */ 137 struct list_head grp_list; /* nh group entries using this nh */ 138 struct net *net; 139 140 u32 id; 141 142 u8 protocol; /* app managing this nh */ 143 u8 nh_flags; 144 bool is_group; 145 146 refcount_t refcnt; 147 struct rcu_head rcu; 148 149 union { 150 struct nh_info __rcu *nh_info; 151 struct nh_group __rcu *nh_grp; 152 }; 153 }; 154 155 enum nexthop_event_type { 156 NEXTHOP_EVENT_DEL, 157 NEXTHOP_EVENT_REPLACE, 158 NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, 159 NEXTHOP_EVENT_BUCKET_REPLACE, 160 }; 161 162 enum nh_notifier_info_type { 163 NH_NOTIFIER_INFO_TYPE_SINGLE, 164 NH_NOTIFIER_INFO_TYPE_GRP, 165 NH_NOTIFIER_INFO_TYPE_RES_TABLE, 166 NH_NOTIFIER_INFO_TYPE_RES_BUCKET, 167 }; 168 169 struct nh_notifier_single_info { 170 struct net_device *dev; 171 u8 gw_family; 172 union { 173 __be32 ipv4; 174 struct in6_addr ipv6; 175 }; 176 u8 is_reject:1, 177 is_fdb:1, 178 has_encap:1; 179 }; 180 181 struct nh_notifier_grp_entry_info { 182 u8 weight; 183 u32 id; 184 struct nh_notifier_single_info nh; 185 }; 186 187 struct nh_notifier_grp_info { 188 u16 num_nh; 189 bool is_fdb; 190 struct nh_notifier_grp_entry_info nh_entries[]; 191 }; 192 193 struct nh_notifier_res_bucket_info { 194 u16 bucket_index; 195 unsigned int idle_timer_ms; 196 bool force; 197 struct nh_notifier_single_info old_nh; 198 struct nh_notifier_single_info new_nh; 199 }; 200 201 struct nh_notifier_res_table_info { 202 u16 num_nh_buckets; 203 struct nh_notifier_single_info nhs[]; 204 }; 205 206 struct nh_notifier_info { 207 struct net *net; 208 struct netlink_ext_ack *extack; 209 u32 id; 210 enum nh_notifier_info_type type; 211 union { 212 struct nh_notifier_single_info *nh; 213 struct nh_notifier_grp_info *nh_grp; 214 struct nh_notifier_res_table_info *nh_res_table; 215 struct nh_notifier_res_bucket_info *nh_res_bucket; 216 }; 217 }; 218 219 int register_nexthop_notifier(struct net *net, struct notifier_block *nb, 220 struct netlink_ext_ack *extack); 221 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 222 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); 223 void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, 224 bool offload, bool trap); 225 void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, 226 unsigned long *activity); 227 228 /* caller is holding rcu or rtnl; no reference taken to nexthop */ 229 struct nexthop *nexthop_find_by_id(struct net *net, u32 id); 230 void nexthop_free_rcu(struct rcu_head *head); 231 232 static inline bool nexthop_get(struct nexthop *nh) 233 { 234 return refcount_inc_not_zero(&nh->refcnt); 235 } 236 237 static inline void nexthop_put(struct nexthop *nh) 238 { 239 if (refcount_dec_and_test(&nh->refcnt)) 240 call_rcu(&nh->rcu, nexthop_free_rcu); 241 } 242 243 static inline bool nexthop_cmp(const struct nexthop *nh1, 244 const struct nexthop *nh2) 245 { 246 return nh1 == nh2; 247 } 248 249 static inline bool nexthop_is_fdb(const struct nexthop *nh) 250 { 251 if (nh->is_group) { 252 const struct nh_group *nh_grp; 253 254 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 255 return nh_grp->fdb_nh; 256 } else { 257 const struct nh_info *nhi; 258 259 nhi = rcu_dereference_rtnl(nh->nh_info); 260 return nhi->fdb_nh; 261 } 262 } 263 264 static inline bool nexthop_has_v4(const struct nexthop *nh) 265 { 266 if (nh->is_group) { 267 struct nh_group *nh_grp; 268 269 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 270 return nh_grp->has_v4; 271 } 272 return false; 273 } 274 275 static inline bool nexthop_is_multipath(const struct nexthop *nh) 276 { 277 if (nh->is_group) { 278 struct nh_group *nh_grp; 279 280 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 281 return nh_grp->is_multipath; 282 } 283 return false; 284 } 285 286 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); 287 288 static inline unsigned int nexthop_num_path(const struct nexthop *nh) 289 { 290 unsigned int rc = 1; 291 292 if (nh->is_group) { 293 struct nh_group *nh_grp; 294 295 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 296 if (nh_grp->is_multipath) 297 rc = nh_grp->num_nh; 298 } 299 300 return rc; 301 } 302 303 static inline 304 struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) 305 { 306 /* for_nexthops macros in fib_semantics.c grabs a pointer to 307 * the nexthop before checking nhsel 308 */ 309 if (nhsel >= nhg->num_nh) 310 return NULL; 311 312 return nhg->nh_entries[nhsel].nh; 313 } 314 315 static inline 316 int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, 317 u8 rt_family) 318 { 319 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 320 int i; 321 322 for (i = 0; i < nhg->num_nh; i++) { 323 struct nexthop *nhe = nhg->nh_entries[i].nh; 324 struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); 325 struct fib_nh_common *nhc = &nhi->fib_nhc; 326 int weight = nhg->nh_entries[i].weight; 327 328 if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) 329 return -EMSGSIZE; 330 } 331 332 return 0; 333 } 334 335 /* called with rcu lock */ 336 static inline bool nexthop_is_blackhole(const struct nexthop *nh) 337 { 338 const struct nh_info *nhi; 339 340 if (nh->is_group) { 341 struct nh_group *nh_grp; 342 343 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 344 if (nh_grp->num_nh > 1) 345 return false; 346 347 nh = nh_grp->nh_entries[0].nh; 348 } 349 350 nhi = rcu_dereference_rtnl(nh->nh_info); 351 return nhi->reject_nh; 352 } 353 354 static inline void nexthop_path_fib_result(struct fib_result *res, int hash) 355 { 356 struct nh_info *nhi; 357 struct nexthop *nh; 358 359 nh = nexthop_select_path(res->fi->nh, hash); 360 nhi = rcu_dereference(nh->nh_info); 361 res->nhc = &nhi->fib_nhc; 362 } 363 364 /* called with rcu read lock or rtnl held */ 365 static inline 366 struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) 367 { 368 struct nh_info *nhi; 369 370 BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); 371 BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); 372 373 if (nh->is_group) { 374 struct nh_group *nh_grp; 375 376 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 377 if (nh_grp->is_multipath) { 378 nh = nexthop_mpath_select(nh_grp, nhsel); 379 if (!nh) 380 return NULL; 381 } 382 } 383 384 nhi = rcu_dereference_rtnl(nh->nh_info); 385 return &nhi->fib_nhc; 386 } 387 388 /* called from fib_table_lookup with rcu_lock */ 389 static inline 390 struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, 391 int fib_flags, 392 const struct flowi4 *flp, 393 int *nhsel) 394 { 395 struct nh_info *nhi; 396 397 if (nh->is_group) { 398 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 399 int i; 400 401 for (i = 0; i < nhg->num_nh; i++) { 402 struct nexthop *nhe = nhg->nh_entries[i].nh; 403 404 nhi = rcu_dereference(nhe->nh_info); 405 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 406 *nhsel = i; 407 return &nhi->fib_nhc; 408 } 409 } 410 } else { 411 nhi = rcu_dereference(nh->nh_info); 412 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 413 *nhsel = 0; 414 return &nhi->fib_nhc; 415 } 416 } 417 418 return NULL; 419 } 420 421 static inline bool nexthop_uses_dev(const struct nexthop *nh, 422 const struct net_device *dev) 423 { 424 struct nh_info *nhi; 425 426 if (nh->is_group) { 427 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 428 int i; 429 430 for (i = 0; i < nhg->num_nh; i++) { 431 struct nexthop *nhe = nhg->nh_entries[i].nh; 432 433 nhi = rcu_dereference(nhe->nh_info); 434 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 435 return true; 436 } 437 } else { 438 nhi = rcu_dereference(nh->nh_info); 439 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 440 return true; 441 } 442 443 return false; 444 } 445 446 static inline unsigned int fib_info_num_path(const struct fib_info *fi) 447 { 448 if (unlikely(fi->nh)) 449 return nexthop_num_path(fi->nh); 450 451 return fi->fib_nhs; 452 } 453 454 int fib_check_nexthop(struct nexthop *nh, u8 scope, 455 struct netlink_ext_ack *extack); 456 457 static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) 458 { 459 if (unlikely(fi->nh)) 460 return nexthop_fib_nhc(fi->nh, nhsel); 461 462 return &fi->fib_nh[nhsel].nh_common; 463 } 464 465 /* only used when fib_nh is built into fib_info */ 466 static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) 467 { 468 WARN_ON(fi->nh); 469 470 return &fi->fib_nh[nhsel]; 471 } 472 473 /* 474 * IPv6 variants 475 */ 476 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 477 struct netlink_ext_ack *extack); 478 479 /* Caller should either hold rcu_read_lock(), or RTNL. */ 480 static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) 481 { 482 struct nh_info *nhi; 483 484 if (nh->is_group) { 485 struct nh_group *nh_grp; 486 487 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 488 nh = nexthop_mpath_select(nh_grp, 0); 489 if (!nh) 490 return NULL; 491 } 492 493 nhi = rcu_dereference_rtnl(nh->nh_info); 494 if (nhi->family == AF_INET6) 495 return &nhi->fib6_nh; 496 497 return NULL; 498 } 499 500 /* Variant of nexthop_fib6_nh(). 501 * Caller should either hold rcu_read_lock_bh(), or RTNL. 502 */ 503 static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh) 504 { 505 struct nh_info *nhi; 506 507 if (nh->is_group) { 508 struct nh_group *nh_grp; 509 510 nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp); 511 nh = nexthop_mpath_select(nh_grp, 0); 512 if (!nh) 513 return NULL; 514 } 515 516 nhi = rcu_dereference_bh_rtnl(nh->nh_info); 517 if (nhi->family == AF_INET6) 518 return &nhi->fib6_nh; 519 520 return NULL; 521 } 522 523 static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) 524 { 525 struct fib6_nh *fib6_nh; 526 527 fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; 528 return fib6_nh->fib_nh_dev; 529 } 530 531 static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) 532 { 533 struct nexthop *nh = res->f6i->nh; 534 struct nh_info *nhi; 535 536 nh = nexthop_select_path(nh, hash); 537 538 nhi = rcu_dereference_rtnl(nh->nh_info); 539 if (nhi->reject_nh) { 540 res->fib6_type = RTN_BLACKHOLE; 541 res->fib6_flags |= RTF_REJECT; 542 res->nh = nexthop_fib6_nh(nh); 543 } else { 544 res->nh = &nhi->fib6_nh; 545 } 546 } 547 548 int nexthop_for_each_fib6_nh(struct nexthop *nh, 549 int (*cb)(struct fib6_nh *nh, void *arg), 550 void *arg); 551 552 static inline int nexthop_get_family(struct nexthop *nh) 553 { 554 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 555 556 return nhi->family; 557 } 558 559 static inline 560 struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) 561 { 562 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 563 564 return &nhi->fib_nhc; 565 } 566 567 static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, 568 int hash) 569 { 570 struct nh_info *nhi; 571 struct nexthop *nhp; 572 573 nhp = nexthop_select_path(nh, hash); 574 if (unlikely(!nhp)) 575 return NULL; 576 nhi = rcu_dereference(nhp->nh_info); 577 return &nhi->fib_nhc; 578 } 579 #endif 580