1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <net/fib_notifier.h> 5 #include <net/nexthop.h> 6 #include "tc_tun_encap.h" 7 #include "en_tc.h" 8 #include "tc_tun.h" 9 #include "rep/tc.h" 10 #include "diag/en_tc_tracepoint.h" 11 12 enum { 13 MLX5E_ROUTE_ENTRY_VALID = BIT(0), 14 }; 15 16 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, 17 struct mlx5_flow_attr *attr, 18 struct mlx5e_encap_entry *e, 19 int out_index) 20 { 21 struct net_device *route_dev; 22 int err = 0; 23 24 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); 25 26 if (!route_dev || !netif_is_ovs_master(route_dev)) 27 goto out; 28 29 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, 30 MLX5E_TC_INT_PORT_EGRESS, 31 &attr->action, out_index); 32 33 out: 34 if (route_dev) 35 dev_put(route_dev); 36 37 return err; 38 } 39 40 struct mlx5e_route_key { 41 int ip_version; 42 union { 43 __be32 v4; 44 struct in6_addr v6; 45 } endpoint_ip; 46 }; 47 48 struct mlx5e_route_entry { 49 struct mlx5e_route_key key; 50 struct list_head encap_entries; 51 struct list_head decap_flows; 52 u32 flags; 53 struct hlist_node hlist; 54 refcount_t refcnt; 55 int tunnel_dev_index; 56 struct rcu_head rcu; 57 }; 58 59 struct mlx5e_tc_tun_encap { 60 struct mlx5e_priv *priv; 61 struct notifier_block fib_nb; 62 spinlock_t route_lock; /* protects route_tbl */ 63 unsigned long route_tbl_last_update; 64 DECLARE_HASHTABLE(route_tbl, 8); 65 }; 66 67 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) 68 { 69 return r->flags & MLX5E_ROUTE_ENTRY_VALID; 70 } 71 72 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, 73 struct mlx5_flow_spec *spec) 74 { 75 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 76 struct mlx5_rx_tun_attr *tun_attr; 77 void *daddr, *saddr; 78 u8 ip_version; 79 80 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); 81 if (!tun_attr) 82 return -ENOMEM; 83 84 esw_attr->rx_tun_attr = tun_attr; 85 ip_version = mlx5e_tc_get_ip_version(spec, true); 86 87 if (ip_version == 4) { 88 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 89 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 90 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 91 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); 92 tun_attr->dst_ip.v4 = *(__be32 *)daddr; 93 tun_attr->src_ip.v4 = *(__be32 *)saddr; 94 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) 95 return 0; 96 } 97 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) 98 else if (ip_version == 6) { 99 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); 100 struct in6_addr zerov6 = {}; 101 102 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 103 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); 104 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 105 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); 106 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); 107 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); 108 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || 109 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) 110 return 0; 111 } 112 #endif 113 /* Only set the flag if both src and dst ip addresses exist. They are 114 * required to establish routing. 115 */ 116 flow_flag_set(flow, TUN_RX); 117 flow->attr->tun_ip_version = ip_version; 118 return 0; 119 } 120 121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) 122 { 123 bool all_flow_encaps_valid = true; 124 int i; 125 126 /* Flow can be associated with multiple encap entries. 127 * Before offloading the flow verify that all of them have 128 * a valid neighbour. 129 */ 130 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 131 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) 132 continue; 133 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { 134 all_flow_encaps_valid = false; 135 break; 136 } 137 } 138 139 return all_flow_encaps_valid; 140 } 141 142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, 143 struct mlx5e_encap_entry *e, 144 struct list_head *flow_list) 145 { 146 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 147 struct mlx5_pkt_reformat_params reformat_params; 148 struct mlx5_esw_flow_attr *esw_attr; 149 struct mlx5_flow_handle *rule; 150 struct mlx5_flow_attr *attr; 151 struct mlx5_flow_spec *spec; 152 struct mlx5e_tc_flow *flow; 153 int err; 154 155 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) 156 return; 157 158 memset(&reformat_params, 0, sizeof(reformat_params)); 159 reformat_params.type = e->reformat_type; 160 reformat_params.size = e->encap_size; 161 reformat_params.data = e->encap_header; 162 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 163 &reformat_params, 164 MLX5_FLOW_NAMESPACE_FDB); 165 if (IS_ERR(e->pkt_reformat)) { 166 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", 167 PTR_ERR(e->pkt_reformat)); 168 return; 169 } 170 e->flags |= MLX5_ENCAP_ENTRY_VALID; 171 mlx5e_rep_queue_neigh_stats_work(priv); 172 173 list_for_each_entry(flow, flow_list, tmp_list) { 174 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) 175 continue; 176 attr = flow->attr; 177 esw_attr = attr->esw_attr; 178 spec = &attr->parse_attr->spec; 179 180 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 181 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 182 183 /* Do not offload flows with unresolved neighbors */ 184 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 185 continue; 186 /* update from slow path rule to encap rule */ 187 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 188 if (IS_ERR(rule)) { 189 err = PTR_ERR(rule); 190 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 191 err); 192 continue; 193 } 194 195 mlx5e_tc_unoffload_from_slow_path(esw, flow); 196 flow->rule[0] = rule; 197 /* was unset when slow path rule removed */ 198 flow_flag_set(flow, OFFLOADED); 199 } 200 } 201 202 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, 203 struct mlx5e_encap_entry *e, 204 struct list_head *flow_list) 205 { 206 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 207 struct mlx5_esw_flow_attr *esw_attr; 208 struct mlx5_flow_handle *rule; 209 struct mlx5_flow_attr *attr; 210 struct mlx5_flow_spec *spec; 211 struct mlx5e_tc_flow *flow; 212 int err; 213 214 list_for_each_entry(flow, flow_list, tmp_list) { 215 if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) 216 continue; 217 attr = flow->attr; 218 esw_attr = attr->esw_attr; 219 spec = &attr->parse_attr->spec; 220 221 /* update from encap rule to slow path rule */ 222 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 223 /* mark the flow's encap dest as non-valid */ 224 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; 225 226 if (IS_ERR(rule)) { 227 err = PTR_ERR(rule); 228 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 229 err); 230 continue; 231 } 232 233 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 234 flow->rule[0] = rule; 235 /* was unset when fast path rule removed */ 236 flow_flag_set(flow, OFFLOADED); 237 } 238 239 /* we know that the encap is valid */ 240 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 241 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 242 } 243 244 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, 245 struct list_head *flow_list, 246 int index) 247 { 248 if (IS_ERR(mlx5e_flow_get(flow))) 249 return; 250 wait_for_completion(&flow->init_done); 251 252 flow->tmp_entry_index = index; 253 list_add(&flow->tmp_list, flow_list); 254 } 255 256 /* Takes reference to all flows attached to encap and adds the flows to 257 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 258 */ 259 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) 260 { 261 struct encap_flow_item *efi; 262 struct mlx5e_tc_flow *flow; 263 264 list_for_each_entry(efi, &e->flows, list) { 265 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); 266 mlx5e_take_tmp_flow(flow, flow_list, efi->index); 267 } 268 } 269 270 /* Takes reference to all flows attached to route and adds the flows to 271 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 272 */ 273 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, 274 struct list_head *flow_list) 275 { 276 struct mlx5e_tc_flow *flow; 277 278 list_for_each_entry(flow, &r->decap_flows, decap_routes) 279 mlx5e_take_tmp_flow(flow, flow_list, 0); 280 } 281 282 typedef bool (match_cb)(struct mlx5e_encap_entry *); 283 284 static struct mlx5e_encap_entry * 285 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, 286 struct mlx5e_encap_entry *e, 287 match_cb match) 288 { 289 struct mlx5e_encap_entry *next = NULL; 290 291 retry: 292 rcu_read_lock(); 293 294 /* find encap with non-zero reference counter value */ 295 for (next = e ? 296 list_next_or_null_rcu(&nhe->encap_list, 297 &e->encap_list, 298 struct mlx5e_encap_entry, 299 encap_list) : 300 list_first_or_null_rcu(&nhe->encap_list, 301 struct mlx5e_encap_entry, 302 encap_list); 303 next; 304 next = list_next_or_null_rcu(&nhe->encap_list, 305 &next->encap_list, 306 struct mlx5e_encap_entry, 307 encap_list)) 308 if (mlx5e_encap_take(next)) 309 break; 310 311 rcu_read_unlock(); 312 313 /* release starting encap */ 314 if (e) 315 mlx5e_encap_put(netdev_priv(e->out_dev), e); 316 if (!next) 317 return next; 318 319 /* wait for encap to be fully initialized */ 320 wait_for_completion(&next->res_ready); 321 /* continue searching if encap entry is not in valid state after completion */ 322 if (!match(next)) { 323 e = next; 324 goto retry; 325 } 326 327 return next; 328 } 329 330 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) 331 { 332 return e->flags & MLX5_ENCAP_ENTRY_VALID; 333 } 334 335 static struct mlx5e_encap_entry * 336 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 337 struct mlx5e_encap_entry *e) 338 { 339 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); 340 } 341 342 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) 343 { 344 return e->compl_result >= 0; 345 } 346 347 struct mlx5e_encap_entry * 348 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, 349 struct mlx5e_encap_entry *e) 350 { 351 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); 352 } 353 354 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) 355 { 356 struct mlx5e_neigh *m_neigh = &nhe->m_neigh; 357 struct mlx5e_encap_entry *e = NULL; 358 struct mlx5e_tc_flow *flow; 359 struct mlx5_fc *counter; 360 struct neigh_table *tbl; 361 bool neigh_used = false; 362 struct neighbour *n; 363 u64 lastuse; 364 365 if (m_neigh->family == AF_INET) 366 tbl = &arp_tbl; 367 #if IS_ENABLED(CONFIG_IPV6) 368 else if (m_neigh->family == AF_INET6) 369 tbl = ipv6_stub->nd_tbl; 370 #endif 371 else 372 return; 373 374 /* mlx5e_get_next_valid_encap() releases previous encap before returning 375 * next one. 376 */ 377 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { 378 struct mlx5e_priv *priv = netdev_priv(e->out_dev); 379 struct encap_flow_item *efi, *tmp; 380 struct mlx5_eswitch *esw; 381 LIST_HEAD(flow_list); 382 383 esw = priv->mdev->priv.eswitch; 384 mutex_lock(&esw->offloads.encap_tbl_lock); 385 list_for_each_entry_safe(efi, tmp, &e->flows, list) { 386 flow = container_of(efi, struct mlx5e_tc_flow, 387 encaps[efi->index]); 388 if (IS_ERR(mlx5e_flow_get(flow))) 389 continue; 390 list_add(&flow->tmp_list, &flow_list); 391 392 if (mlx5e_is_offloaded_flow(flow)) { 393 counter = mlx5e_tc_get_counter(flow); 394 lastuse = mlx5_fc_query_lastuse(counter); 395 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { 396 neigh_used = true; 397 break; 398 } 399 } 400 } 401 mutex_unlock(&esw->offloads.encap_tbl_lock); 402 403 mlx5e_put_flow_list(priv, &flow_list); 404 if (neigh_used) { 405 /* release current encap before breaking the loop */ 406 mlx5e_encap_put(priv, e); 407 break; 408 } 409 } 410 411 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); 412 413 if (neigh_used) { 414 nhe->reported_lastuse = jiffies; 415 416 /* find the relevant neigh according to the cached device and 417 * dst ip pair 418 */ 419 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); 420 if (!n) 421 return; 422 423 neigh_event_send(n, NULL); 424 neigh_release(n); 425 } 426 } 427 428 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 429 { 430 WARN_ON(!list_empty(&e->flows)); 431 432 if (e->compl_result > 0) { 433 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); 434 435 if (e->flags & MLX5_ENCAP_ENTRY_VALID) 436 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 437 } 438 439 kfree(e->tun_info); 440 kfree(e->encap_header); 441 kfree_rcu(e, rcu); 442 } 443 444 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, 445 struct mlx5e_decap_entry *d) 446 { 447 WARN_ON(!list_empty(&d->flows)); 448 449 if (!d->compl_result) 450 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); 451 452 kfree_rcu(d, rcu); 453 } 454 455 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 456 { 457 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 458 459 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) 460 return; 461 list_del(&e->route_list); 462 hash_del_rcu(&e->encap_hlist); 463 mutex_unlock(&esw->offloads.encap_tbl_lock); 464 465 mlx5e_encap_dealloc(priv, e); 466 } 467 468 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) 469 { 470 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 471 472 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) 473 return; 474 hash_del_rcu(&d->hlist); 475 mutex_unlock(&esw->offloads.decap_tbl_lock); 476 477 mlx5e_decap_dealloc(priv, d); 478 } 479 480 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 481 struct mlx5e_tc_flow *flow, 482 int out_index); 483 484 void mlx5e_detach_encap(struct mlx5e_priv *priv, 485 struct mlx5e_tc_flow *flow, int out_index) 486 { 487 struct mlx5e_encap_entry *e = flow->encaps[out_index].e; 488 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 489 490 if (flow->attr->esw_attr->dests[out_index].flags & 491 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) 492 mlx5e_detach_encap_route(priv, flow, out_index); 493 494 /* flow wasn't fully initialized */ 495 if (!e) 496 return; 497 498 mutex_lock(&esw->offloads.encap_tbl_lock); 499 list_del(&flow->encaps[out_index].list); 500 flow->encaps[out_index].e = NULL; 501 if (!refcount_dec_and_test(&e->refcnt)) { 502 mutex_unlock(&esw->offloads.encap_tbl_lock); 503 return; 504 } 505 list_del(&e->route_list); 506 hash_del_rcu(&e->encap_hlist); 507 mutex_unlock(&esw->offloads.encap_tbl_lock); 508 509 mlx5e_encap_dealloc(priv, e); 510 } 511 512 void mlx5e_detach_decap(struct mlx5e_priv *priv, 513 struct mlx5e_tc_flow *flow) 514 { 515 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 516 struct mlx5e_decap_entry *d = flow->decap_reformat; 517 518 if (!d) 519 return; 520 521 mutex_lock(&esw->offloads.decap_tbl_lock); 522 list_del(&flow->l3_to_l2_reformat); 523 flow->decap_reformat = NULL; 524 525 if (!refcount_dec_and_test(&d->refcnt)) { 526 mutex_unlock(&esw->offloads.decap_tbl_lock); 527 return; 528 } 529 hash_del_rcu(&d->hlist); 530 mutex_unlock(&esw->offloads.decap_tbl_lock); 531 532 mlx5e_decap_dealloc(priv, d); 533 } 534 535 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, 536 struct mlx5e_encap_key *b) 537 { 538 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && 539 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; 540 } 541 542 static int cmp_decap_info(struct mlx5e_decap_key *a, 543 struct mlx5e_decap_key *b) 544 { 545 return memcmp(&a->key, &b->key, sizeof(b->key)); 546 } 547 548 static int hash_encap_info(struct mlx5e_encap_key *key) 549 { 550 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), 551 key->tc_tunnel->tunnel_type); 552 } 553 554 static int hash_decap_info(struct mlx5e_decap_key *key) 555 { 556 return jhash(&key->key, sizeof(key->key), 0); 557 } 558 559 bool mlx5e_encap_take(struct mlx5e_encap_entry *e) 560 { 561 return refcount_inc_not_zero(&e->refcnt); 562 } 563 564 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) 565 { 566 return refcount_inc_not_zero(&e->refcnt); 567 } 568 569 static struct mlx5e_encap_entry * 570 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, 571 uintptr_t hash_key) 572 { 573 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 574 struct mlx5e_encap_key e_key; 575 struct mlx5e_encap_entry *e; 576 577 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, 578 encap_hlist, hash_key) { 579 e_key.ip_tun_key = &e->tun_info->key; 580 e_key.tc_tunnel = e->tunnel; 581 if (e->tunnel->encap_info_equal(&e_key, key) && 582 mlx5e_encap_take(e)) 583 return e; 584 } 585 586 return NULL; 587 } 588 589 static struct mlx5e_decap_entry * 590 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, 591 uintptr_t hash_key) 592 { 593 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 594 struct mlx5e_decap_key r_key; 595 struct mlx5e_decap_entry *e; 596 597 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, 598 hlist, hash_key) { 599 r_key = e->key; 600 if (!cmp_decap_info(&r_key, key) && 601 mlx5e_decap_take(e)) 602 return e; 603 } 604 return NULL; 605 } 606 607 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) 608 { 609 size_t tun_size = sizeof(*tun_info) + tun_info->options_len; 610 611 return kmemdup(tun_info, tun_size, GFP_KERNEL); 612 } 613 614 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, 615 struct mlx5e_tc_flow *flow, 616 int out_index, 617 struct mlx5e_encap_entry *e, 618 struct netlink_ext_ack *extack) 619 { 620 int i; 621 622 for (i = 0; i < out_index; i++) { 623 if (flow->encaps[i].e != e) 624 continue; 625 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); 626 netdev_err(priv->netdev, "can't duplicate encap action\n"); 627 return true; 628 } 629 630 return false; 631 } 632 633 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, 634 struct mlx5_flow_attr *attr, 635 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 636 struct net_device *out_dev, 637 int route_dev_ifindex, 638 int out_index) 639 { 640 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 641 struct net_device *route_dev; 642 u16 vport_num; 643 int err = 0; 644 u32 data; 645 646 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 647 648 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 649 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) 650 goto out; 651 652 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 653 if (err) 654 goto out; 655 656 attr->dest_chain = 0; 657 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 658 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 659 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, 660 vport_num); 661 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, 662 MLX5_FLOW_NAMESPACE_FDB, 663 VPORT_TO_REG, data); 664 if (err >= 0) { 665 esw_attr->dests[out_index].src_port_rewrite_act_id = err; 666 err = 0; 667 } 668 669 out: 670 if (route_dev) 671 dev_put(route_dev); 672 return err; 673 } 674 675 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, 676 struct mlx5_esw_flow_attr *attr, 677 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 678 struct net_device *out_dev, 679 int route_dev_ifindex, 680 int out_index) 681 { 682 int act_id = attr->dests[out_index].src_port_rewrite_act_id; 683 struct net_device *route_dev; 684 u16 vport_num; 685 int err = 0; 686 u32 data; 687 688 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 689 690 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 691 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { 692 err = -ENODEV; 693 goto out; 694 } 695 696 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 697 if (err) 698 goto out; 699 700 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, 701 vport_num); 702 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); 703 704 out: 705 if (route_dev) 706 dev_put(route_dev); 707 return err; 708 } 709 710 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) 711 { 712 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 713 struct mlx5_rep_uplink_priv *uplink_priv; 714 struct mlx5e_rep_priv *uplink_rpriv; 715 struct mlx5e_tc_tun_encap *encap; 716 unsigned int ret; 717 718 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 719 uplink_priv = &uplink_rpriv->uplink_priv; 720 encap = uplink_priv->encap; 721 722 spin_lock_bh(&encap->route_lock); 723 ret = encap->route_tbl_last_update; 724 spin_unlock_bh(&encap->route_lock); 725 return ret; 726 } 727 728 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 729 struct mlx5e_tc_flow *flow, 730 struct mlx5e_encap_entry *e, 731 bool new_encap_entry, 732 unsigned long tbl_time_before, 733 int out_index); 734 735 int mlx5e_attach_encap(struct mlx5e_priv *priv, 736 struct mlx5e_tc_flow *flow, 737 struct net_device *mirred_dev, 738 int out_index, 739 struct netlink_ext_ack *extack, 740 struct net_device **encap_dev, 741 bool *encap_valid) 742 { 743 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 744 struct mlx5e_tc_flow_parse_attr *parse_attr; 745 struct mlx5_flow_attr *attr = flow->attr; 746 const struct ip_tunnel_info *tun_info; 747 unsigned long tbl_time_before = 0; 748 struct mlx5e_encap_entry *e; 749 struct mlx5e_encap_key key; 750 bool entry_created = false; 751 unsigned short family; 752 uintptr_t hash_key; 753 int err = 0; 754 755 parse_attr = attr->parse_attr; 756 tun_info = parse_attr->tun_info[out_index]; 757 family = ip_tunnel_info_af(tun_info); 758 key.ip_tun_key = &tun_info->key; 759 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); 760 if (!key.tc_tunnel) { 761 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); 762 return -EOPNOTSUPP; 763 } 764 765 hash_key = hash_encap_info(&key); 766 767 mutex_lock(&esw->offloads.encap_tbl_lock); 768 e = mlx5e_encap_get(priv, &key, hash_key); 769 770 /* must verify if encap is valid or not */ 771 if (e) { 772 /* Check that entry was not already attached to this flow */ 773 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { 774 err = -EOPNOTSUPP; 775 goto out_err; 776 } 777 778 mutex_unlock(&esw->offloads.encap_tbl_lock); 779 wait_for_completion(&e->res_ready); 780 781 /* Protect against concurrent neigh update. */ 782 mutex_lock(&esw->offloads.encap_tbl_lock); 783 if (e->compl_result < 0) { 784 err = -EREMOTEIO; 785 goto out_err; 786 } 787 goto attach_flow; 788 } 789 790 e = kzalloc(sizeof(*e), GFP_KERNEL); 791 if (!e) { 792 err = -ENOMEM; 793 goto out_err; 794 } 795 796 refcount_set(&e->refcnt, 1); 797 init_completion(&e->res_ready); 798 entry_created = true; 799 INIT_LIST_HEAD(&e->route_list); 800 801 tun_info = mlx5e_dup_tun_info(tun_info); 802 if (!tun_info) { 803 err = -ENOMEM; 804 goto out_err_init; 805 } 806 e->tun_info = tun_info; 807 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); 808 if (err) 809 goto out_err_init; 810 811 INIT_LIST_HEAD(&e->flows); 812 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); 813 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 814 mutex_unlock(&esw->offloads.encap_tbl_lock); 815 816 if (family == AF_INET) 817 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); 818 else if (family == AF_INET6) 819 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); 820 821 /* Protect against concurrent neigh update. */ 822 mutex_lock(&esw->offloads.encap_tbl_lock); 823 complete_all(&e->res_ready); 824 if (err) { 825 e->compl_result = err; 826 goto out_err; 827 } 828 e->compl_result = 1; 829 830 attach_flow: 831 err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, 832 out_index); 833 if (err) 834 goto out_err; 835 836 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index); 837 if (err == -EOPNOTSUPP) { 838 /* If device doesn't support int port offload, 839 * redirect to uplink vport. 840 */ 841 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n"); 842 err = 0; 843 } else if (err) { 844 goto out_err; 845 } 846 847 flow->encaps[out_index].e = e; 848 list_add(&flow->encaps[out_index].list, &e->flows); 849 flow->encaps[out_index].index = out_index; 850 *encap_dev = e->out_dev; 851 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 852 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; 853 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 854 *encap_valid = true; 855 } else { 856 *encap_valid = false; 857 } 858 mutex_unlock(&esw->offloads.encap_tbl_lock); 859 860 return err; 861 862 out_err: 863 mutex_unlock(&esw->offloads.encap_tbl_lock); 864 if (e) 865 mlx5e_encap_put(priv, e); 866 return err; 867 868 out_err_init: 869 mutex_unlock(&esw->offloads.encap_tbl_lock); 870 kfree(tun_info); 871 kfree(e); 872 return err; 873 } 874 875 int mlx5e_attach_decap(struct mlx5e_priv *priv, 876 struct mlx5e_tc_flow *flow, 877 struct netlink_ext_ack *extack) 878 { 879 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 880 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 881 struct mlx5_pkt_reformat_params reformat_params; 882 struct mlx5e_tc_flow_parse_attr *parse_attr; 883 struct mlx5e_decap_entry *d; 884 struct mlx5e_decap_key key; 885 uintptr_t hash_key; 886 int err = 0; 887 888 parse_attr = flow->attr->parse_attr; 889 if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { 890 NL_SET_ERR_MSG_MOD(extack, 891 "encap header larger than max supported"); 892 return -EOPNOTSUPP; 893 } 894 895 key.key = parse_attr->eth; 896 hash_key = hash_decap_info(&key); 897 mutex_lock(&esw->offloads.decap_tbl_lock); 898 d = mlx5e_decap_get(priv, &key, hash_key); 899 if (d) { 900 mutex_unlock(&esw->offloads.decap_tbl_lock); 901 wait_for_completion(&d->res_ready); 902 mutex_lock(&esw->offloads.decap_tbl_lock); 903 if (d->compl_result) { 904 err = -EREMOTEIO; 905 goto out_free; 906 } 907 goto found; 908 } 909 910 d = kzalloc(sizeof(*d), GFP_KERNEL); 911 if (!d) { 912 err = -ENOMEM; 913 goto out_err; 914 } 915 916 d->key = key; 917 refcount_set(&d->refcnt, 1); 918 init_completion(&d->res_ready); 919 INIT_LIST_HEAD(&d->flows); 920 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); 921 mutex_unlock(&esw->offloads.decap_tbl_lock); 922 923 memset(&reformat_params, 0, sizeof(reformat_params)); 924 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; 925 reformat_params.size = sizeof(parse_attr->eth); 926 reformat_params.data = &parse_attr->eth; 927 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 928 &reformat_params, 929 MLX5_FLOW_NAMESPACE_FDB); 930 if (IS_ERR(d->pkt_reformat)) { 931 err = PTR_ERR(d->pkt_reformat); 932 d->compl_result = err; 933 } 934 mutex_lock(&esw->offloads.decap_tbl_lock); 935 complete_all(&d->res_ready); 936 if (err) 937 goto out_free; 938 939 found: 940 flow->decap_reformat = d; 941 attr->decap_pkt_reformat = d->pkt_reformat; 942 list_add(&flow->l3_to_l2_reformat, &d->flows); 943 mutex_unlock(&esw->offloads.decap_tbl_lock); 944 return 0; 945 946 out_free: 947 mutex_unlock(&esw->offloads.decap_tbl_lock); 948 mlx5e_decap_put(priv, d); 949 return err; 950 951 out_err: 952 mutex_unlock(&esw->offloads.decap_tbl_lock); 953 return err; 954 } 955 956 static int cmp_route_info(struct mlx5e_route_key *a, 957 struct mlx5e_route_key *b) 958 { 959 if (a->ip_version == 4 && b->ip_version == 4) 960 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, 961 sizeof(a->endpoint_ip.v4)); 962 else if (a->ip_version == 6 && b->ip_version == 6) 963 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, 964 sizeof(a->endpoint_ip.v6)); 965 return 1; 966 } 967 968 static u32 hash_route_info(struct mlx5e_route_key *key) 969 { 970 if (key->ip_version == 4) 971 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); 972 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); 973 } 974 975 static void mlx5e_route_dealloc(struct mlx5e_priv *priv, 976 struct mlx5e_route_entry *r) 977 { 978 WARN_ON(!list_empty(&r->decap_flows)); 979 WARN_ON(!list_empty(&r->encap_entries)); 980 981 kfree_rcu(r, rcu); 982 } 983 984 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 985 { 986 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 987 988 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) 989 return; 990 991 hash_del_rcu(&r->hlist); 992 mutex_unlock(&esw->offloads.encap_tbl_lock); 993 994 mlx5e_route_dealloc(priv, r); 995 } 996 997 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 998 { 999 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1000 1001 lockdep_assert_held(&esw->offloads.encap_tbl_lock); 1002 1003 if (!refcount_dec_and_test(&r->refcnt)) 1004 return; 1005 hash_del_rcu(&r->hlist); 1006 mlx5e_route_dealloc(priv, r); 1007 } 1008 1009 static struct mlx5e_route_entry * 1010 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, 1011 u32 hash_key) 1012 { 1013 struct mlx5e_route_key r_key; 1014 struct mlx5e_route_entry *r; 1015 1016 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { 1017 r_key = r->key; 1018 if (!cmp_route_info(&r_key, key) && 1019 refcount_inc_not_zero(&r->refcnt)) 1020 return r; 1021 } 1022 return NULL; 1023 } 1024 1025 static struct mlx5e_route_entry * 1026 mlx5e_route_get_create(struct mlx5e_priv *priv, 1027 struct mlx5e_route_key *key, 1028 int tunnel_dev_index, 1029 unsigned long *route_tbl_change_time) 1030 { 1031 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1032 struct mlx5_rep_uplink_priv *uplink_priv; 1033 struct mlx5e_rep_priv *uplink_rpriv; 1034 struct mlx5e_tc_tun_encap *encap; 1035 struct mlx5e_route_entry *r; 1036 u32 hash_key; 1037 1038 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1039 uplink_priv = &uplink_rpriv->uplink_priv; 1040 encap = uplink_priv->encap; 1041 1042 hash_key = hash_route_info(key); 1043 spin_lock_bh(&encap->route_lock); 1044 r = mlx5e_route_get(encap, key, hash_key); 1045 spin_unlock_bh(&encap->route_lock); 1046 if (r) { 1047 if (!mlx5e_route_entry_valid(r)) { 1048 mlx5e_route_put_locked(priv, r); 1049 return ERR_PTR(-EINVAL); 1050 } 1051 return r; 1052 } 1053 1054 r = kzalloc(sizeof(*r), GFP_KERNEL); 1055 if (!r) 1056 return ERR_PTR(-ENOMEM); 1057 1058 r->key = *key; 1059 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1060 r->tunnel_dev_index = tunnel_dev_index; 1061 refcount_set(&r->refcnt, 1); 1062 INIT_LIST_HEAD(&r->decap_flows); 1063 INIT_LIST_HEAD(&r->encap_entries); 1064 1065 spin_lock_bh(&encap->route_lock); 1066 *route_tbl_change_time = encap->route_tbl_last_update; 1067 hash_add(encap->route_tbl, &r->hlist, hash_key); 1068 spin_unlock_bh(&encap->route_lock); 1069 1070 return r; 1071 } 1072 1073 static struct mlx5e_route_entry * 1074 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) 1075 { 1076 u32 hash_key = hash_route_info(key); 1077 struct mlx5e_route_entry *r; 1078 1079 spin_lock_bh(&encap->route_lock); 1080 encap->route_tbl_last_update = jiffies; 1081 r = mlx5e_route_get(encap, key, hash_key); 1082 spin_unlock_bh(&encap->route_lock); 1083 1084 return r; 1085 } 1086 1087 struct mlx5e_tc_fib_event_data { 1088 struct work_struct work; 1089 unsigned long event; 1090 struct mlx5e_route_entry *r; 1091 struct net_device *ul_dev; 1092 }; 1093 1094 static void mlx5e_tc_fib_event_work(struct work_struct *work); 1095 static struct mlx5e_tc_fib_event_data * 1096 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) 1097 { 1098 struct mlx5e_tc_fib_event_data *fib_work; 1099 1100 fib_work = kzalloc(sizeof(*fib_work), flags); 1101 if (WARN_ON(!fib_work)) 1102 return NULL; 1103 1104 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); 1105 fib_work->event = event; 1106 fib_work->ul_dev = ul_dev; 1107 1108 return fib_work; 1109 } 1110 1111 static int 1112 mlx5e_route_enqueue_update(struct mlx5e_priv *priv, 1113 struct mlx5e_route_entry *r, 1114 unsigned long event) 1115 { 1116 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1117 struct mlx5e_tc_fib_event_data *fib_work; 1118 struct mlx5e_rep_priv *uplink_rpriv; 1119 struct net_device *ul_dev; 1120 1121 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1122 ul_dev = uplink_rpriv->netdev; 1123 1124 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); 1125 if (!fib_work) 1126 return -ENOMEM; 1127 1128 dev_hold(ul_dev); 1129 refcount_inc(&r->refcnt); 1130 fib_work->r = r; 1131 queue_work(priv->wq, &fib_work->work); 1132 1133 return 0; 1134 } 1135 1136 int mlx5e_attach_decap_route(struct mlx5e_priv *priv, 1137 struct mlx5e_tc_flow *flow) 1138 { 1139 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1140 unsigned long tbl_time_before, tbl_time_after; 1141 struct mlx5e_tc_flow_parse_attr *parse_attr; 1142 struct mlx5_flow_attr *attr = flow->attr; 1143 struct mlx5_esw_flow_attr *esw_attr; 1144 struct mlx5e_route_entry *r; 1145 struct mlx5e_route_key key; 1146 int err = 0; 1147 1148 esw_attr = attr->esw_attr; 1149 parse_attr = attr->parse_attr; 1150 mutex_lock(&esw->offloads.encap_tbl_lock); 1151 if (!esw_attr->rx_tun_attr) 1152 goto out; 1153 1154 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 1155 tbl_time_after = tbl_time_before; 1156 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev); 1157 if (err || !esw_attr->rx_tun_attr->decap_vport) 1158 goto out; 1159 1160 key.ip_version = attr->tun_ip_version; 1161 if (key.ip_version == 4) 1162 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; 1163 else 1164 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; 1165 1166 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, 1167 &tbl_time_after); 1168 if (IS_ERR(r)) { 1169 err = PTR_ERR(r); 1170 goto out; 1171 } 1172 /* Routing changed concurrently. FIB event handler might have missed new 1173 * entry, schedule update. 1174 */ 1175 if (tbl_time_before != tbl_time_after) { 1176 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1177 if (err) { 1178 mlx5e_route_put_locked(priv, r); 1179 goto out; 1180 } 1181 } 1182 1183 flow->decap_route = r; 1184 list_add(&flow->decap_routes, &r->decap_flows); 1185 mutex_unlock(&esw->offloads.encap_tbl_lock); 1186 return 0; 1187 1188 out: 1189 mutex_unlock(&esw->offloads.encap_tbl_lock); 1190 return err; 1191 } 1192 1193 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 1194 struct mlx5e_tc_flow *flow, 1195 struct mlx5e_encap_entry *e, 1196 bool new_encap_entry, 1197 unsigned long tbl_time_before, 1198 int out_index) 1199 { 1200 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1201 unsigned long tbl_time_after = tbl_time_before; 1202 struct mlx5e_tc_flow_parse_attr *parse_attr; 1203 struct mlx5_flow_attr *attr = flow->attr; 1204 const struct ip_tunnel_info *tun_info; 1205 struct mlx5_esw_flow_attr *esw_attr; 1206 struct mlx5e_route_entry *r; 1207 struct mlx5e_route_key key; 1208 unsigned short family; 1209 int err = 0; 1210 1211 esw_attr = attr->esw_attr; 1212 parse_attr = attr->parse_attr; 1213 tun_info = parse_attr->tun_info[out_index]; 1214 family = ip_tunnel_info_af(tun_info); 1215 1216 if (family == AF_INET) { 1217 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; 1218 key.ip_version = 4; 1219 } else if (family == AF_INET6) { 1220 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; 1221 key.ip_version = 6; 1222 } 1223 1224 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, 1225 e->route_dev_ifindex, out_index); 1226 if (err || !(esw_attr->dests[out_index].flags & 1227 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) 1228 return err; 1229 1230 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], 1231 &tbl_time_after); 1232 if (IS_ERR(r)) 1233 return PTR_ERR(r); 1234 /* Routing changed concurrently. FIB event handler might have missed new 1235 * entry, schedule update. 1236 */ 1237 if (tbl_time_before != tbl_time_after) { 1238 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1239 if (err) { 1240 mlx5e_route_put_locked(priv, r); 1241 return err; 1242 } 1243 } 1244 1245 flow->encap_routes[out_index].r = r; 1246 if (new_encap_entry) 1247 list_add(&e->route_list, &r->encap_entries); 1248 flow->encap_routes[out_index].index = out_index; 1249 return 0; 1250 } 1251 1252 void mlx5e_detach_decap_route(struct mlx5e_priv *priv, 1253 struct mlx5e_tc_flow *flow) 1254 { 1255 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1256 struct mlx5e_route_entry *r = flow->decap_route; 1257 1258 if (!r) 1259 return; 1260 1261 mutex_lock(&esw->offloads.encap_tbl_lock); 1262 list_del(&flow->decap_routes); 1263 flow->decap_route = NULL; 1264 1265 if (!refcount_dec_and_test(&r->refcnt)) { 1266 mutex_unlock(&esw->offloads.encap_tbl_lock); 1267 return; 1268 } 1269 hash_del_rcu(&r->hlist); 1270 mutex_unlock(&esw->offloads.encap_tbl_lock); 1271 1272 mlx5e_route_dealloc(priv, r); 1273 } 1274 1275 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 1276 struct mlx5e_tc_flow *flow, 1277 int out_index) 1278 { 1279 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; 1280 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1281 struct mlx5e_encap_entry *e, *tmp; 1282 1283 if (!r) 1284 return; 1285 1286 mutex_lock(&esw->offloads.encap_tbl_lock); 1287 flow->encap_routes[out_index].r = NULL; 1288 1289 if (!refcount_dec_and_test(&r->refcnt)) { 1290 mutex_unlock(&esw->offloads.encap_tbl_lock); 1291 return; 1292 } 1293 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) 1294 list_del_init(&e->route_list); 1295 hash_del_rcu(&r->hlist); 1296 mutex_unlock(&esw->offloads.encap_tbl_lock); 1297 1298 mlx5e_route_dealloc(priv, r); 1299 } 1300 1301 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, 1302 struct mlx5e_encap_entry *e, 1303 struct list_head *encap_flows) 1304 { 1305 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1306 struct mlx5e_tc_flow *flow; 1307 1308 list_for_each_entry(flow, encap_flows, tmp_list) { 1309 struct mlx5_flow_attr *attr = flow->attr; 1310 struct mlx5_esw_flow_attr *esw_attr; 1311 1312 if (!mlx5e_is_offloaded_flow(flow)) 1313 continue; 1314 esw_attr = attr->esw_attr; 1315 1316 if (flow_flag_test(flow, SLOW)) 1317 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1318 else 1319 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1320 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); 1321 attr->modify_hdr = NULL; 1322 1323 esw_attr->dests[flow->tmp_entry_index].flags &= 1324 ~MLX5_ESW_DEST_ENCAP_VALID; 1325 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; 1326 } 1327 1328 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; 1329 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1330 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 1331 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 1332 e->pkt_reformat = NULL; 1333 } 1334 } 1335 1336 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, 1337 struct net_device *tunnel_dev, 1338 struct mlx5e_encap_entry *e, 1339 struct list_head *encap_flows) 1340 { 1341 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1342 struct mlx5e_tc_flow *flow; 1343 int err; 1344 1345 err = ip_tunnel_info_af(e->tun_info) == AF_INET ? 1346 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : 1347 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); 1348 if (err) 1349 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); 1350 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; 1351 1352 list_for_each_entry(flow, encap_flows, tmp_list) { 1353 struct mlx5e_tc_flow_parse_attr *parse_attr; 1354 struct mlx5_flow_attr *attr = flow->attr; 1355 struct mlx5_esw_flow_attr *esw_attr; 1356 struct mlx5_flow_handle *rule; 1357 struct mlx5_flow_spec *spec; 1358 1359 if (flow_flag_test(flow, FAILED)) 1360 continue; 1361 1362 esw_attr = attr->esw_attr; 1363 parse_attr = attr->parse_attr; 1364 spec = &parse_attr->spec; 1365 1366 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, 1367 e->out_dev, e->route_dev_ifindex, 1368 flow->tmp_entry_index); 1369 if (err) { 1370 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); 1371 continue; 1372 } 1373 1374 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); 1375 if (err) { 1376 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", 1377 err); 1378 continue; 1379 } 1380 1381 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1382 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 1383 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 1384 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 1385 goto offload_to_slow_path; 1386 /* update from slow path rule to encap rule */ 1387 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1388 if (IS_ERR(rule)) { 1389 err = PTR_ERR(rule); 1390 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 1391 err); 1392 } else { 1393 flow->rule[0] = rule; 1394 } 1395 } else { 1396 offload_to_slow_path: 1397 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 1398 /* mark the flow's encap dest as non-valid */ 1399 esw_attr->dests[flow->tmp_entry_index].flags &= 1400 ~MLX5_ESW_DEST_ENCAP_VALID; 1401 1402 if (IS_ERR(rule)) { 1403 err = PTR_ERR(rule); 1404 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 1405 err); 1406 } else { 1407 flow->rule[0] = rule; 1408 } 1409 } 1410 flow_flag_set(flow, OFFLOADED); 1411 } 1412 } 1413 1414 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, 1415 struct mlx5e_route_entry *r, 1416 struct list_head *flow_list, 1417 bool replace) 1418 { 1419 struct net_device *tunnel_dev; 1420 struct mlx5e_encap_entry *e; 1421 1422 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1423 if (!tunnel_dev) 1424 return -ENODEV; 1425 1426 list_for_each_entry(e, &r->encap_entries, route_list) { 1427 LIST_HEAD(encap_flows); 1428 1429 mlx5e_take_all_encap_flows(e, &encap_flows); 1430 if (list_empty(&encap_flows)) 1431 continue; 1432 1433 if (mlx5e_route_entry_valid(r)) 1434 mlx5e_invalidate_encap(priv, e, &encap_flows); 1435 1436 if (!replace) { 1437 list_splice(&encap_flows, flow_list); 1438 continue; 1439 } 1440 1441 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); 1442 list_splice(&encap_flows, flow_list); 1443 } 1444 1445 return 0; 1446 } 1447 1448 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, 1449 struct list_head *flow_list) 1450 { 1451 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1452 struct mlx5e_tc_flow *flow; 1453 1454 list_for_each_entry(flow, flow_list, tmp_list) 1455 if (mlx5e_is_offloaded_flow(flow)) 1456 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1457 } 1458 1459 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, 1460 struct list_head *decap_flows) 1461 { 1462 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1463 struct mlx5e_tc_flow *flow; 1464 1465 list_for_each_entry(flow, decap_flows, tmp_list) { 1466 struct mlx5e_tc_flow_parse_attr *parse_attr; 1467 struct mlx5_flow_attr *attr = flow->attr; 1468 struct mlx5_flow_handle *rule; 1469 struct mlx5_flow_spec *spec; 1470 int err; 1471 1472 if (flow_flag_test(flow, FAILED)) 1473 continue; 1474 1475 parse_attr = attr->parse_attr; 1476 spec = &parse_attr->spec; 1477 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev); 1478 if (err) { 1479 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", 1480 err); 1481 continue; 1482 } 1483 1484 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1485 if (IS_ERR(rule)) { 1486 err = PTR_ERR(rule); 1487 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", 1488 err); 1489 } else { 1490 flow->rule[0] = rule; 1491 flow_flag_set(flow, OFFLOADED); 1492 } 1493 } 1494 } 1495 1496 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, 1497 struct mlx5e_route_entry *r, 1498 struct list_head *flow_list, 1499 bool replace) 1500 { 1501 struct net_device *tunnel_dev; 1502 LIST_HEAD(decap_flows); 1503 1504 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1505 if (!tunnel_dev) 1506 return -ENODEV; 1507 1508 mlx5e_take_all_route_decap_flows(r, &decap_flows); 1509 if (mlx5e_route_entry_valid(r)) 1510 mlx5e_unoffload_flow_list(priv, &decap_flows); 1511 if (replace) 1512 mlx5e_reoffload_decap(priv, &decap_flows); 1513 1514 list_splice(&decap_flows, flow_list); 1515 1516 return 0; 1517 } 1518 1519 static void mlx5e_tc_fib_event_work(struct work_struct *work) 1520 { 1521 struct mlx5e_tc_fib_event_data *event_data = 1522 container_of(work, struct mlx5e_tc_fib_event_data, work); 1523 struct net_device *ul_dev = event_data->ul_dev; 1524 struct mlx5e_priv *priv = netdev_priv(ul_dev); 1525 struct mlx5e_route_entry *r = event_data->r; 1526 struct mlx5_eswitch *esw; 1527 LIST_HEAD(flow_list); 1528 bool replace; 1529 int err; 1530 1531 /* sync with concurrent neigh updates */ 1532 rtnl_lock(); 1533 esw = priv->mdev->priv.eswitch; 1534 mutex_lock(&esw->offloads.encap_tbl_lock); 1535 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; 1536 1537 if (!mlx5e_route_entry_valid(r) && !replace) 1538 goto out; 1539 1540 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); 1541 if (err) 1542 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", 1543 err); 1544 1545 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); 1546 if (err) 1547 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", 1548 err); 1549 1550 if (replace) 1551 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1552 out: 1553 mutex_unlock(&esw->offloads.encap_tbl_lock); 1554 rtnl_unlock(); 1555 1556 mlx5e_put_flow_list(priv, &flow_list); 1557 mlx5e_route_put(priv, event_data->r); 1558 dev_put(event_data->ul_dev); 1559 kfree(event_data); 1560 } 1561 1562 static struct mlx5e_tc_fib_event_data * 1563 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, 1564 struct net_device *ul_dev, 1565 struct mlx5e_tc_tun_encap *encap, 1566 unsigned long event, 1567 struct fib_notifier_info *info) 1568 { 1569 struct fib_entry_notifier_info *fen_info; 1570 struct mlx5e_tc_fib_event_data *fib_work; 1571 struct mlx5e_route_entry *r; 1572 struct mlx5e_route_key key; 1573 struct net_device *fib_dev; 1574 1575 fen_info = container_of(info, struct fib_entry_notifier_info, info); 1576 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; 1577 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || 1578 fen_info->dst_len != 32) 1579 return NULL; 1580 1581 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1582 if (!fib_work) 1583 return ERR_PTR(-ENOMEM); 1584 1585 key.endpoint_ip.v4 = htonl(fen_info->dst); 1586 key.ip_version = 4; 1587 1588 /* Can't fail after this point because releasing reference to r 1589 * requires obtaining sleeping mutex which we can't do in atomic 1590 * context. 1591 */ 1592 r = mlx5e_route_lookup_for_update(encap, &key); 1593 if (!r) 1594 goto out; 1595 fib_work->r = r; 1596 dev_hold(ul_dev); 1597 1598 return fib_work; 1599 1600 out: 1601 kfree(fib_work); 1602 return NULL; 1603 } 1604 1605 static struct mlx5e_tc_fib_event_data * 1606 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, 1607 struct net_device *ul_dev, 1608 struct mlx5e_tc_tun_encap *encap, 1609 unsigned long event, 1610 struct fib_notifier_info *info) 1611 { 1612 struct fib6_entry_notifier_info *fen_info; 1613 struct mlx5e_tc_fib_event_data *fib_work; 1614 struct mlx5e_route_entry *r; 1615 struct mlx5e_route_key key; 1616 struct net_device *fib_dev; 1617 1618 fen_info = container_of(info, struct fib6_entry_notifier_info, info); 1619 fib_dev = fib6_info_nh_dev(fen_info->rt); 1620 if (fib_dev->netdev_ops != &mlx5e_netdev_ops || 1621 fen_info->rt->fib6_dst.plen != 128) 1622 return NULL; 1623 1624 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1625 if (!fib_work) 1626 return ERR_PTR(-ENOMEM); 1627 1628 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, 1629 sizeof(fen_info->rt->fib6_dst.addr)); 1630 key.ip_version = 6; 1631 1632 /* Can't fail after this point because releasing reference to r 1633 * requires obtaining sleeping mutex which we can't do in atomic 1634 * context. 1635 */ 1636 r = mlx5e_route_lookup_for_update(encap, &key); 1637 if (!r) 1638 goto out; 1639 fib_work->r = r; 1640 dev_hold(ul_dev); 1641 1642 return fib_work; 1643 1644 out: 1645 kfree(fib_work); 1646 return NULL; 1647 } 1648 1649 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) 1650 { 1651 struct mlx5e_tc_fib_event_data *fib_work; 1652 struct fib_notifier_info *info = ptr; 1653 struct mlx5e_tc_tun_encap *encap; 1654 struct net_device *ul_dev; 1655 struct mlx5e_priv *priv; 1656 1657 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); 1658 priv = encap->priv; 1659 ul_dev = priv->netdev; 1660 priv = netdev_priv(ul_dev); 1661 1662 switch (event) { 1663 case FIB_EVENT_ENTRY_REPLACE: 1664 case FIB_EVENT_ENTRY_DEL: 1665 if (info->family == AF_INET) 1666 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); 1667 else if (info->family == AF_INET6) 1668 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); 1669 else 1670 return NOTIFY_DONE; 1671 1672 if (!IS_ERR_OR_NULL(fib_work)) { 1673 queue_work(priv->wq, &fib_work->work); 1674 } else if (IS_ERR(fib_work)) { 1675 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); 1676 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", 1677 PTR_ERR(fib_work)); 1678 } 1679 1680 break; 1681 default: 1682 return NOTIFY_DONE; 1683 } 1684 1685 return NOTIFY_DONE; 1686 } 1687 1688 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) 1689 { 1690 struct mlx5e_tc_tun_encap *encap; 1691 int err; 1692 1693 encap = kvzalloc(sizeof(*encap), GFP_KERNEL); 1694 if (!encap) 1695 return ERR_PTR(-ENOMEM); 1696 1697 encap->priv = priv; 1698 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; 1699 spin_lock_init(&encap->route_lock); 1700 hash_init(encap->route_tbl); 1701 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, 1702 NULL, NULL); 1703 if (err) { 1704 kvfree(encap); 1705 return ERR_PTR(err); 1706 } 1707 1708 return encap; 1709 } 1710 1711 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) 1712 { 1713 if (!encap) 1714 return; 1715 1716 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); 1717 flush_workqueue(encap->priv->wq); /* flush fib event works */ 1718 kvfree(encap); 1719 } 1720