1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <net/fib_notifier.h> 5 #include <net/nexthop.h> 6 #include "tc_tun_encap.h" 7 #include "en_tc.h" 8 #include "tc_tun.h" 9 #include "rep/tc.h" 10 #include "diag/en_tc_tracepoint.h" 11 12 enum { 13 MLX5E_ROUTE_ENTRY_VALID = BIT(0), 14 }; 15 16 struct mlx5e_route_key { 17 int ip_version; 18 union { 19 __be32 v4; 20 struct in6_addr v6; 21 } endpoint_ip; 22 }; 23 24 struct mlx5e_route_entry { 25 struct mlx5e_route_key key; 26 struct list_head encap_entries; 27 struct list_head decap_flows; 28 u32 flags; 29 struct hlist_node hlist; 30 refcount_t refcnt; 31 int tunnel_dev_index; 32 struct rcu_head rcu; 33 }; 34 35 struct mlx5e_tc_tun_encap { 36 struct mlx5e_priv *priv; 37 struct notifier_block fib_nb; 38 spinlock_t route_lock; /* protects route_tbl */ 39 unsigned long route_tbl_last_update; 40 DECLARE_HASHTABLE(route_tbl, 8); 41 }; 42 43 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) 44 { 45 return r->flags & MLX5E_ROUTE_ENTRY_VALID; 46 } 47 48 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, 49 struct mlx5_flow_spec *spec) 50 { 51 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 52 struct mlx5_rx_tun_attr *tun_attr; 53 void *daddr, *saddr; 54 u8 ip_version; 55 56 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); 57 if (!tun_attr) 58 return -ENOMEM; 59 60 esw_attr->rx_tun_attr = tun_attr; 61 ip_version = mlx5e_tc_get_ip_version(spec, true); 62 63 if (ip_version == 4) { 64 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 65 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 66 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 67 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); 68 tun_attr->dst_ip.v4 = *(__be32 *)daddr; 69 tun_attr->src_ip.v4 = *(__be32 *)saddr; 70 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) 71 return 0; 72 } 73 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) 74 else if (ip_version == 6) { 75 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); 76 struct in6_addr zerov6 = {}; 77 78 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 79 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); 80 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 81 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); 82 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); 83 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); 84 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || 85 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) 86 return 0; 87 } 88 #endif 89 /* Only set the flag if both src and dst ip addresses exist. They are 90 * required to establish routing. 91 */ 92 flow_flag_set(flow, TUN_RX); 93 flow->attr->tun_ip_version = ip_version; 94 return 0; 95 } 96 97 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) 98 { 99 bool all_flow_encaps_valid = true; 100 int i; 101 102 /* Flow can be associated with multiple encap entries. 103 * Before offloading the flow verify that all of them have 104 * a valid neighbour. 105 */ 106 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 107 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) 108 continue; 109 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { 110 all_flow_encaps_valid = false; 111 break; 112 } 113 } 114 115 return all_flow_encaps_valid; 116 } 117 118 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, 119 struct mlx5e_encap_entry *e, 120 struct list_head *flow_list) 121 { 122 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 123 struct mlx5_pkt_reformat_params reformat_params; 124 struct mlx5_esw_flow_attr *esw_attr; 125 struct mlx5_flow_handle *rule; 126 struct mlx5_flow_attr *attr; 127 struct mlx5_flow_spec *spec; 128 struct mlx5e_tc_flow *flow; 129 int err; 130 131 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) 132 return; 133 134 memset(&reformat_params, 0, sizeof(reformat_params)); 135 reformat_params.type = e->reformat_type; 136 reformat_params.size = e->encap_size; 137 reformat_params.data = e->encap_header; 138 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 139 &reformat_params, 140 MLX5_FLOW_NAMESPACE_FDB); 141 if (IS_ERR(e->pkt_reformat)) { 142 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", 143 PTR_ERR(e->pkt_reformat)); 144 return; 145 } 146 e->flags |= MLX5_ENCAP_ENTRY_VALID; 147 mlx5e_rep_queue_neigh_stats_work(priv); 148 149 list_for_each_entry(flow, flow_list, tmp_list) { 150 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) 151 continue; 152 attr = flow->attr; 153 esw_attr = attr->esw_attr; 154 spec = &attr->parse_attr->spec; 155 156 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 157 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 158 159 /* Do not offload flows with unresolved neighbors */ 160 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 161 continue; 162 /* update from slow path rule to encap rule */ 163 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 164 if (IS_ERR(rule)) { 165 err = PTR_ERR(rule); 166 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 167 err); 168 continue; 169 } 170 171 mlx5e_tc_unoffload_from_slow_path(esw, flow); 172 flow->rule[0] = rule; 173 /* was unset when slow path rule removed */ 174 flow_flag_set(flow, OFFLOADED); 175 } 176 } 177 178 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, 179 struct mlx5e_encap_entry *e, 180 struct list_head *flow_list) 181 { 182 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 183 struct mlx5_esw_flow_attr *esw_attr; 184 struct mlx5_flow_handle *rule; 185 struct mlx5_flow_attr *attr; 186 struct mlx5_flow_spec *spec; 187 struct mlx5e_tc_flow *flow; 188 int err; 189 190 list_for_each_entry(flow, flow_list, tmp_list) { 191 if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) 192 continue; 193 attr = flow->attr; 194 esw_attr = attr->esw_attr; 195 spec = &attr->parse_attr->spec; 196 197 /* update from encap rule to slow path rule */ 198 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 199 /* mark the flow's encap dest as non-valid */ 200 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; 201 202 if (IS_ERR(rule)) { 203 err = PTR_ERR(rule); 204 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 205 err); 206 continue; 207 } 208 209 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 210 flow->rule[0] = rule; 211 /* was unset when fast path rule removed */ 212 flow_flag_set(flow, OFFLOADED); 213 } 214 215 /* we know that the encap is valid */ 216 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 217 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 218 } 219 220 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, 221 struct list_head *flow_list, 222 int index) 223 { 224 if (IS_ERR(mlx5e_flow_get(flow))) 225 return; 226 wait_for_completion(&flow->init_done); 227 228 flow->tmp_entry_index = index; 229 list_add(&flow->tmp_list, flow_list); 230 } 231 232 /* Takes reference to all flows attached to encap and adds the flows to 233 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 234 */ 235 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) 236 { 237 struct encap_flow_item *efi; 238 struct mlx5e_tc_flow *flow; 239 240 list_for_each_entry(efi, &e->flows, list) { 241 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); 242 mlx5e_take_tmp_flow(flow, flow_list, efi->index); 243 } 244 } 245 246 /* Takes reference to all flows attached to route and adds the flows to 247 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 248 */ 249 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, 250 struct list_head *flow_list) 251 { 252 struct mlx5e_tc_flow *flow; 253 254 list_for_each_entry(flow, &r->decap_flows, decap_routes) 255 mlx5e_take_tmp_flow(flow, flow_list, 0); 256 } 257 258 typedef bool (match_cb)(struct mlx5e_encap_entry *); 259 260 static struct mlx5e_encap_entry * 261 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, 262 struct mlx5e_encap_entry *e, 263 match_cb match) 264 { 265 struct mlx5e_encap_entry *next = NULL; 266 267 retry: 268 rcu_read_lock(); 269 270 /* find encap with non-zero reference counter value */ 271 for (next = e ? 272 list_next_or_null_rcu(&nhe->encap_list, 273 &e->encap_list, 274 struct mlx5e_encap_entry, 275 encap_list) : 276 list_first_or_null_rcu(&nhe->encap_list, 277 struct mlx5e_encap_entry, 278 encap_list); 279 next; 280 next = list_next_or_null_rcu(&nhe->encap_list, 281 &next->encap_list, 282 struct mlx5e_encap_entry, 283 encap_list)) 284 if (mlx5e_encap_take(next)) 285 break; 286 287 rcu_read_unlock(); 288 289 /* release starting encap */ 290 if (e) 291 mlx5e_encap_put(netdev_priv(e->out_dev), e); 292 if (!next) 293 return next; 294 295 /* wait for encap to be fully initialized */ 296 wait_for_completion(&next->res_ready); 297 /* continue searching if encap entry is not in valid state after completion */ 298 if (!match(next)) { 299 e = next; 300 goto retry; 301 } 302 303 return next; 304 } 305 306 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) 307 { 308 return e->flags & MLX5_ENCAP_ENTRY_VALID; 309 } 310 311 static struct mlx5e_encap_entry * 312 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 313 struct mlx5e_encap_entry *e) 314 { 315 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); 316 } 317 318 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) 319 { 320 return e->compl_result >= 0; 321 } 322 323 struct mlx5e_encap_entry * 324 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, 325 struct mlx5e_encap_entry *e) 326 { 327 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); 328 } 329 330 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) 331 { 332 struct mlx5e_neigh *m_neigh = &nhe->m_neigh; 333 struct mlx5e_encap_entry *e = NULL; 334 struct mlx5e_tc_flow *flow; 335 struct mlx5_fc *counter; 336 struct neigh_table *tbl; 337 bool neigh_used = false; 338 struct neighbour *n; 339 u64 lastuse; 340 341 if (m_neigh->family == AF_INET) 342 tbl = &arp_tbl; 343 #if IS_ENABLED(CONFIG_IPV6) 344 else if (m_neigh->family == AF_INET6) 345 tbl = ipv6_stub->nd_tbl; 346 #endif 347 else 348 return; 349 350 /* mlx5e_get_next_valid_encap() releases previous encap before returning 351 * next one. 352 */ 353 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { 354 struct mlx5e_priv *priv = netdev_priv(e->out_dev); 355 struct encap_flow_item *efi, *tmp; 356 struct mlx5_eswitch *esw; 357 LIST_HEAD(flow_list); 358 359 esw = priv->mdev->priv.eswitch; 360 mutex_lock(&esw->offloads.encap_tbl_lock); 361 list_for_each_entry_safe(efi, tmp, &e->flows, list) { 362 flow = container_of(efi, struct mlx5e_tc_flow, 363 encaps[efi->index]); 364 if (IS_ERR(mlx5e_flow_get(flow))) 365 continue; 366 list_add(&flow->tmp_list, &flow_list); 367 368 if (mlx5e_is_offloaded_flow(flow)) { 369 counter = mlx5e_tc_get_counter(flow); 370 lastuse = mlx5_fc_query_lastuse(counter); 371 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { 372 neigh_used = true; 373 break; 374 } 375 } 376 } 377 mutex_unlock(&esw->offloads.encap_tbl_lock); 378 379 mlx5e_put_flow_list(priv, &flow_list); 380 if (neigh_used) { 381 /* release current encap before breaking the loop */ 382 mlx5e_encap_put(priv, e); 383 break; 384 } 385 } 386 387 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); 388 389 if (neigh_used) { 390 nhe->reported_lastuse = jiffies; 391 392 /* find the relevant neigh according to the cached device and 393 * dst ip pair 394 */ 395 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); 396 if (!n) 397 return; 398 399 neigh_event_send(n, NULL); 400 neigh_release(n); 401 } 402 } 403 404 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 405 { 406 WARN_ON(!list_empty(&e->flows)); 407 408 if (e->compl_result > 0) { 409 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); 410 411 if (e->flags & MLX5_ENCAP_ENTRY_VALID) 412 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 413 } 414 415 kfree(e->tun_info); 416 kfree(e->encap_header); 417 kfree_rcu(e, rcu); 418 } 419 420 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, 421 struct mlx5e_decap_entry *d) 422 { 423 WARN_ON(!list_empty(&d->flows)); 424 425 if (!d->compl_result) 426 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); 427 428 kfree_rcu(d, rcu); 429 } 430 431 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 432 { 433 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 434 435 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) 436 return; 437 list_del(&e->route_list); 438 hash_del_rcu(&e->encap_hlist); 439 mutex_unlock(&esw->offloads.encap_tbl_lock); 440 441 mlx5e_encap_dealloc(priv, e); 442 } 443 444 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) 445 { 446 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 447 448 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) 449 return; 450 hash_del_rcu(&d->hlist); 451 mutex_unlock(&esw->offloads.decap_tbl_lock); 452 453 mlx5e_decap_dealloc(priv, d); 454 } 455 456 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 457 struct mlx5e_tc_flow *flow, 458 int out_index); 459 460 void mlx5e_detach_encap(struct mlx5e_priv *priv, 461 struct mlx5e_tc_flow *flow, int out_index) 462 { 463 struct mlx5e_encap_entry *e = flow->encaps[out_index].e; 464 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 465 466 if (flow->attr->esw_attr->dests[out_index].flags & 467 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) 468 mlx5e_detach_encap_route(priv, flow, out_index); 469 470 /* flow wasn't fully initialized */ 471 if (!e) 472 return; 473 474 mutex_lock(&esw->offloads.encap_tbl_lock); 475 list_del(&flow->encaps[out_index].list); 476 flow->encaps[out_index].e = NULL; 477 if (!refcount_dec_and_test(&e->refcnt)) { 478 mutex_unlock(&esw->offloads.encap_tbl_lock); 479 return; 480 } 481 list_del(&e->route_list); 482 hash_del_rcu(&e->encap_hlist); 483 mutex_unlock(&esw->offloads.encap_tbl_lock); 484 485 mlx5e_encap_dealloc(priv, e); 486 } 487 488 void mlx5e_detach_decap(struct mlx5e_priv *priv, 489 struct mlx5e_tc_flow *flow) 490 { 491 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 492 struct mlx5e_decap_entry *d = flow->decap_reformat; 493 494 if (!d) 495 return; 496 497 mutex_lock(&esw->offloads.decap_tbl_lock); 498 list_del(&flow->l3_to_l2_reformat); 499 flow->decap_reformat = NULL; 500 501 if (!refcount_dec_and_test(&d->refcnt)) { 502 mutex_unlock(&esw->offloads.decap_tbl_lock); 503 return; 504 } 505 hash_del_rcu(&d->hlist); 506 mutex_unlock(&esw->offloads.decap_tbl_lock); 507 508 mlx5e_decap_dealloc(priv, d); 509 } 510 511 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, 512 struct mlx5e_encap_key *b) 513 { 514 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && 515 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; 516 } 517 518 static int cmp_decap_info(struct mlx5e_decap_key *a, 519 struct mlx5e_decap_key *b) 520 { 521 return memcmp(&a->key, &b->key, sizeof(b->key)); 522 } 523 524 static int hash_encap_info(struct mlx5e_encap_key *key) 525 { 526 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), 527 key->tc_tunnel->tunnel_type); 528 } 529 530 static int hash_decap_info(struct mlx5e_decap_key *key) 531 { 532 return jhash(&key->key, sizeof(key->key), 0); 533 } 534 535 bool mlx5e_encap_take(struct mlx5e_encap_entry *e) 536 { 537 return refcount_inc_not_zero(&e->refcnt); 538 } 539 540 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) 541 { 542 return refcount_inc_not_zero(&e->refcnt); 543 } 544 545 static struct mlx5e_encap_entry * 546 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, 547 uintptr_t hash_key) 548 { 549 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 550 struct mlx5e_encap_key e_key; 551 struct mlx5e_encap_entry *e; 552 553 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, 554 encap_hlist, hash_key) { 555 e_key.ip_tun_key = &e->tun_info->key; 556 e_key.tc_tunnel = e->tunnel; 557 if (e->tunnel->encap_info_equal(&e_key, key) && 558 mlx5e_encap_take(e)) 559 return e; 560 } 561 562 return NULL; 563 } 564 565 static struct mlx5e_decap_entry * 566 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, 567 uintptr_t hash_key) 568 { 569 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 570 struct mlx5e_decap_key r_key; 571 struct mlx5e_decap_entry *e; 572 573 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, 574 hlist, hash_key) { 575 r_key = e->key; 576 if (!cmp_decap_info(&r_key, key) && 577 mlx5e_decap_take(e)) 578 return e; 579 } 580 return NULL; 581 } 582 583 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) 584 { 585 size_t tun_size = sizeof(*tun_info) + tun_info->options_len; 586 587 return kmemdup(tun_info, tun_size, GFP_KERNEL); 588 } 589 590 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, 591 struct mlx5e_tc_flow *flow, 592 int out_index, 593 struct mlx5e_encap_entry *e, 594 struct netlink_ext_ack *extack) 595 { 596 int i; 597 598 for (i = 0; i < out_index; i++) { 599 if (flow->encaps[i].e != e) 600 continue; 601 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); 602 netdev_err(priv->netdev, "can't duplicate encap action\n"); 603 return true; 604 } 605 606 return false; 607 } 608 609 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, 610 struct mlx5_flow_attr *attr, 611 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 612 struct net_device *out_dev, 613 int route_dev_ifindex, 614 int out_index) 615 { 616 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 617 struct net_device *route_dev; 618 u16 vport_num; 619 int err = 0; 620 u32 data; 621 622 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 623 624 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 625 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) 626 goto out; 627 628 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 629 if (err) 630 goto out; 631 632 attr->dest_chain = 0; 633 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 634 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 635 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, 636 vport_num); 637 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, 638 MLX5_FLOW_NAMESPACE_FDB, 639 VPORT_TO_REG, data); 640 if (err >= 0) { 641 esw_attr->dests[out_index].src_port_rewrite_act_id = err; 642 err = 0; 643 } 644 645 out: 646 if (route_dev) 647 dev_put(route_dev); 648 return err; 649 } 650 651 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, 652 struct mlx5_esw_flow_attr *attr, 653 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 654 struct net_device *out_dev, 655 int route_dev_ifindex, 656 int out_index) 657 { 658 int act_id = attr->dests[out_index].src_port_rewrite_act_id; 659 struct net_device *route_dev; 660 u16 vport_num; 661 int err = 0; 662 u32 data; 663 664 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 665 666 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 667 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { 668 err = -ENODEV; 669 goto out; 670 } 671 672 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 673 if (err) 674 goto out; 675 676 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, 677 vport_num); 678 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); 679 680 out: 681 if (route_dev) 682 dev_put(route_dev); 683 return err; 684 } 685 686 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) 687 { 688 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 689 struct mlx5_rep_uplink_priv *uplink_priv; 690 struct mlx5e_rep_priv *uplink_rpriv; 691 struct mlx5e_tc_tun_encap *encap; 692 unsigned int ret; 693 694 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 695 uplink_priv = &uplink_rpriv->uplink_priv; 696 encap = uplink_priv->encap; 697 698 spin_lock_bh(&encap->route_lock); 699 ret = encap->route_tbl_last_update; 700 spin_unlock_bh(&encap->route_lock); 701 return ret; 702 } 703 704 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 705 struct mlx5e_tc_flow *flow, 706 struct mlx5e_encap_entry *e, 707 bool new_encap_entry, 708 unsigned long tbl_time_before, 709 int out_index); 710 711 int mlx5e_attach_encap(struct mlx5e_priv *priv, 712 struct mlx5e_tc_flow *flow, 713 struct net_device *mirred_dev, 714 int out_index, 715 struct netlink_ext_ack *extack, 716 struct net_device **encap_dev, 717 bool *encap_valid) 718 { 719 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 720 struct mlx5e_tc_flow_parse_attr *parse_attr; 721 struct mlx5_flow_attr *attr = flow->attr; 722 const struct ip_tunnel_info *tun_info; 723 unsigned long tbl_time_before = 0; 724 struct mlx5e_encap_entry *e; 725 struct mlx5e_encap_key key; 726 bool entry_created = false; 727 unsigned short family; 728 uintptr_t hash_key; 729 int err = 0; 730 731 parse_attr = attr->parse_attr; 732 tun_info = parse_attr->tun_info[out_index]; 733 family = ip_tunnel_info_af(tun_info); 734 key.ip_tun_key = &tun_info->key; 735 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); 736 if (!key.tc_tunnel) { 737 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); 738 return -EOPNOTSUPP; 739 } 740 741 hash_key = hash_encap_info(&key); 742 743 mutex_lock(&esw->offloads.encap_tbl_lock); 744 e = mlx5e_encap_get(priv, &key, hash_key); 745 746 /* must verify if encap is valid or not */ 747 if (e) { 748 /* Check that entry was not already attached to this flow */ 749 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { 750 err = -EOPNOTSUPP; 751 goto out_err; 752 } 753 754 mutex_unlock(&esw->offloads.encap_tbl_lock); 755 wait_for_completion(&e->res_ready); 756 757 /* Protect against concurrent neigh update. */ 758 mutex_lock(&esw->offloads.encap_tbl_lock); 759 if (e->compl_result < 0) { 760 err = -EREMOTEIO; 761 goto out_err; 762 } 763 goto attach_flow; 764 } 765 766 e = kzalloc(sizeof(*e), GFP_KERNEL); 767 if (!e) { 768 err = -ENOMEM; 769 goto out_err; 770 } 771 772 refcount_set(&e->refcnt, 1); 773 init_completion(&e->res_ready); 774 entry_created = true; 775 INIT_LIST_HEAD(&e->route_list); 776 777 tun_info = mlx5e_dup_tun_info(tun_info); 778 if (!tun_info) { 779 err = -ENOMEM; 780 goto out_err_init; 781 } 782 e->tun_info = tun_info; 783 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); 784 if (err) 785 goto out_err_init; 786 787 INIT_LIST_HEAD(&e->flows); 788 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); 789 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 790 mutex_unlock(&esw->offloads.encap_tbl_lock); 791 792 if (family == AF_INET) 793 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); 794 else if (family == AF_INET6) 795 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); 796 797 /* Protect against concurrent neigh update. */ 798 mutex_lock(&esw->offloads.encap_tbl_lock); 799 complete_all(&e->res_ready); 800 if (err) { 801 e->compl_result = err; 802 goto out_err; 803 } 804 e->compl_result = 1; 805 806 attach_flow: 807 err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, 808 out_index); 809 if (err) 810 goto out_err; 811 812 flow->encaps[out_index].e = e; 813 list_add(&flow->encaps[out_index].list, &e->flows); 814 flow->encaps[out_index].index = out_index; 815 *encap_dev = e->out_dev; 816 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 817 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; 818 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 819 *encap_valid = true; 820 } else { 821 *encap_valid = false; 822 } 823 mutex_unlock(&esw->offloads.encap_tbl_lock); 824 825 return err; 826 827 out_err: 828 mutex_unlock(&esw->offloads.encap_tbl_lock); 829 if (e) 830 mlx5e_encap_put(priv, e); 831 return err; 832 833 out_err_init: 834 mutex_unlock(&esw->offloads.encap_tbl_lock); 835 kfree(tun_info); 836 kfree(e); 837 return err; 838 } 839 840 int mlx5e_attach_decap(struct mlx5e_priv *priv, 841 struct mlx5e_tc_flow *flow, 842 struct netlink_ext_ack *extack) 843 { 844 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 845 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 846 struct mlx5_pkt_reformat_params reformat_params; 847 struct mlx5e_tc_flow_parse_attr *parse_attr; 848 struct mlx5e_decap_entry *d; 849 struct mlx5e_decap_key key; 850 uintptr_t hash_key; 851 int err = 0; 852 853 parse_attr = flow->attr->parse_attr; 854 if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { 855 NL_SET_ERR_MSG_MOD(extack, 856 "encap header larger than max supported"); 857 return -EOPNOTSUPP; 858 } 859 860 key.key = parse_attr->eth; 861 hash_key = hash_decap_info(&key); 862 mutex_lock(&esw->offloads.decap_tbl_lock); 863 d = mlx5e_decap_get(priv, &key, hash_key); 864 if (d) { 865 mutex_unlock(&esw->offloads.decap_tbl_lock); 866 wait_for_completion(&d->res_ready); 867 mutex_lock(&esw->offloads.decap_tbl_lock); 868 if (d->compl_result) { 869 err = -EREMOTEIO; 870 goto out_free; 871 } 872 goto found; 873 } 874 875 d = kzalloc(sizeof(*d), GFP_KERNEL); 876 if (!d) { 877 err = -ENOMEM; 878 goto out_err; 879 } 880 881 d->key = key; 882 refcount_set(&d->refcnt, 1); 883 init_completion(&d->res_ready); 884 INIT_LIST_HEAD(&d->flows); 885 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); 886 mutex_unlock(&esw->offloads.decap_tbl_lock); 887 888 memset(&reformat_params, 0, sizeof(reformat_params)); 889 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; 890 reformat_params.size = sizeof(parse_attr->eth); 891 reformat_params.data = &parse_attr->eth; 892 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 893 &reformat_params, 894 MLX5_FLOW_NAMESPACE_FDB); 895 if (IS_ERR(d->pkt_reformat)) { 896 err = PTR_ERR(d->pkt_reformat); 897 d->compl_result = err; 898 } 899 mutex_lock(&esw->offloads.decap_tbl_lock); 900 complete_all(&d->res_ready); 901 if (err) 902 goto out_free; 903 904 found: 905 flow->decap_reformat = d; 906 attr->decap_pkt_reformat = d->pkt_reformat; 907 list_add(&flow->l3_to_l2_reformat, &d->flows); 908 mutex_unlock(&esw->offloads.decap_tbl_lock); 909 return 0; 910 911 out_free: 912 mutex_unlock(&esw->offloads.decap_tbl_lock); 913 mlx5e_decap_put(priv, d); 914 return err; 915 916 out_err: 917 mutex_unlock(&esw->offloads.decap_tbl_lock); 918 return err; 919 } 920 921 static int cmp_route_info(struct mlx5e_route_key *a, 922 struct mlx5e_route_key *b) 923 { 924 if (a->ip_version == 4 && b->ip_version == 4) 925 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, 926 sizeof(a->endpoint_ip.v4)); 927 else if (a->ip_version == 6 && b->ip_version == 6) 928 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, 929 sizeof(a->endpoint_ip.v6)); 930 return 1; 931 } 932 933 static u32 hash_route_info(struct mlx5e_route_key *key) 934 { 935 if (key->ip_version == 4) 936 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); 937 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); 938 } 939 940 static void mlx5e_route_dealloc(struct mlx5e_priv *priv, 941 struct mlx5e_route_entry *r) 942 { 943 WARN_ON(!list_empty(&r->decap_flows)); 944 WARN_ON(!list_empty(&r->encap_entries)); 945 946 kfree_rcu(r, rcu); 947 } 948 949 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 950 { 951 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 952 953 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) 954 return; 955 956 hash_del_rcu(&r->hlist); 957 mutex_unlock(&esw->offloads.encap_tbl_lock); 958 959 mlx5e_route_dealloc(priv, r); 960 } 961 962 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 963 { 964 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 965 966 lockdep_assert_held(&esw->offloads.encap_tbl_lock); 967 968 if (!refcount_dec_and_test(&r->refcnt)) 969 return; 970 hash_del_rcu(&r->hlist); 971 mlx5e_route_dealloc(priv, r); 972 } 973 974 static struct mlx5e_route_entry * 975 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, 976 u32 hash_key) 977 { 978 struct mlx5e_route_key r_key; 979 struct mlx5e_route_entry *r; 980 981 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { 982 r_key = r->key; 983 if (!cmp_route_info(&r_key, key) && 984 refcount_inc_not_zero(&r->refcnt)) 985 return r; 986 } 987 return NULL; 988 } 989 990 static struct mlx5e_route_entry * 991 mlx5e_route_get_create(struct mlx5e_priv *priv, 992 struct mlx5e_route_key *key, 993 int tunnel_dev_index, 994 unsigned long *route_tbl_change_time) 995 { 996 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 997 struct mlx5_rep_uplink_priv *uplink_priv; 998 struct mlx5e_rep_priv *uplink_rpriv; 999 struct mlx5e_tc_tun_encap *encap; 1000 struct mlx5e_route_entry *r; 1001 u32 hash_key; 1002 1003 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1004 uplink_priv = &uplink_rpriv->uplink_priv; 1005 encap = uplink_priv->encap; 1006 1007 hash_key = hash_route_info(key); 1008 spin_lock_bh(&encap->route_lock); 1009 r = mlx5e_route_get(encap, key, hash_key); 1010 spin_unlock_bh(&encap->route_lock); 1011 if (r) { 1012 if (!mlx5e_route_entry_valid(r)) { 1013 mlx5e_route_put_locked(priv, r); 1014 return ERR_PTR(-EINVAL); 1015 } 1016 return r; 1017 } 1018 1019 r = kzalloc(sizeof(*r), GFP_KERNEL); 1020 if (!r) 1021 return ERR_PTR(-ENOMEM); 1022 1023 r->key = *key; 1024 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1025 r->tunnel_dev_index = tunnel_dev_index; 1026 refcount_set(&r->refcnt, 1); 1027 INIT_LIST_HEAD(&r->decap_flows); 1028 INIT_LIST_HEAD(&r->encap_entries); 1029 1030 spin_lock_bh(&encap->route_lock); 1031 *route_tbl_change_time = encap->route_tbl_last_update; 1032 hash_add(encap->route_tbl, &r->hlist, hash_key); 1033 spin_unlock_bh(&encap->route_lock); 1034 1035 return r; 1036 } 1037 1038 static struct mlx5e_route_entry * 1039 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) 1040 { 1041 u32 hash_key = hash_route_info(key); 1042 struct mlx5e_route_entry *r; 1043 1044 spin_lock_bh(&encap->route_lock); 1045 encap->route_tbl_last_update = jiffies; 1046 r = mlx5e_route_get(encap, key, hash_key); 1047 spin_unlock_bh(&encap->route_lock); 1048 1049 return r; 1050 } 1051 1052 struct mlx5e_tc_fib_event_data { 1053 struct work_struct work; 1054 unsigned long event; 1055 struct mlx5e_route_entry *r; 1056 struct net_device *ul_dev; 1057 }; 1058 1059 static void mlx5e_tc_fib_event_work(struct work_struct *work); 1060 static struct mlx5e_tc_fib_event_data * 1061 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) 1062 { 1063 struct mlx5e_tc_fib_event_data *fib_work; 1064 1065 fib_work = kzalloc(sizeof(*fib_work), flags); 1066 if (WARN_ON(!fib_work)) 1067 return NULL; 1068 1069 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); 1070 fib_work->event = event; 1071 fib_work->ul_dev = ul_dev; 1072 1073 return fib_work; 1074 } 1075 1076 static int 1077 mlx5e_route_enqueue_update(struct mlx5e_priv *priv, 1078 struct mlx5e_route_entry *r, 1079 unsigned long event) 1080 { 1081 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1082 struct mlx5e_tc_fib_event_data *fib_work; 1083 struct mlx5e_rep_priv *uplink_rpriv; 1084 struct net_device *ul_dev; 1085 1086 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1087 ul_dev = uplink_rpriv->netdev; 1088 1089 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); 1090 if (!fib_work) 1091 return -ENOMEM; 1092 1093 dev_hold(ul_dev); 1094 refcount_inc(&r->refcnt); 1095 fib_work->r = r; 1096 queue_work(priv->wq, &fib_work->work); 1097 1098 return 0; 1099 } 1100 1101 int mlx5e_attach_decap_route(struct mlx5e_priv *priv, 1102 struct mlx5e_tc_flow *flow) 1103 { 1104 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1105 unsigned long tbl_time_before, tbl_time_after; 1106 struct mlx5e_tc_flow_parse_attr *parse_attr; 1107 struct mlx5_flow_attr *attr = flow->attr; 1108 struct mlx5_esw_flow_attr *esw_attr; 1109 struct mlx5e_route_entry *r; 1110 struct mlx5e_route_key key; 1111 int err = 0; 1112 1113 esw_attr = attr->esw_attr; 1114 parse_attr = attr->parse_attr; 1115 mutex_lock(&esw->offloads.encap_tbl_lock); 1116 if (!esw_attr->rx_tun_attr) 1117 goto out; 1118 1119 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 1120 tbl_time_after = tbl_time_before; 1121 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr); 1122 if (err || !esw_attr->rx_tun_attr->decap_vport) 1123 goto out; 1124 1125 key.ip_version = attr->tun_ip_version; 1126 if (key.ip_version == 4) 1127 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; 1128 else 1129 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; 1130 1131 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, 1132 &tbl_time_after); 1133 if (IS_ERR(r)) { 1134 err = PTR_ERR(r); 1135 goto out; 1136 } 1137 /* Routing changed concurrently. FIB event handler might have missed new 1138 * entry, schedule update. 1139 */ 1140 if (tbl_time_before != tbl_time_after) { 1141 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1142 if (err) { 1143 mlx5e_route_put_locked(priv, r); 1144 goto out; 1145 } 1146 } 1147 1148 flow->decap_route = r; 1149 list_add(&flow->decap_routes, &r->decap_flows); 1150 mutex_unlock(&esw->offloads.encap_tbl_lock); 1151 return 0; 1152 1153 out: 1154 mutex_unlock(&esw->offloads.encap_tbl_lock); 1155 return err; 1156 } 1157 1158 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 1159 struct mlx5e_tc_flow *flow, 1160 struct mlx5e_encap_entry *e, 1161 bool new_encap_entry, 1162 unsigned long tbl_time_before, 1163 int out_index) 1164 { 1165 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1166 unsigned long tbl_time_after = tbl_time_before; 1167 struct mlx5e_tc_flow_parse_attr *parse_attr; 1168 struct mlx5_flow_attr *attr = flow->attr; 1169 const struct ip_tunnel_info *tun_info; 1170 struct mlx5_esw_flow_attr *esw_attr; 1171 struct mlx5e_route_entry *r; 1172 struct mlx5e_route_key key; 1173 unsigned short family; 1174 int err = 0; 1175 1176 esw_attr = attr->esw_attr; 1177 parse_attr = attr->parse_attr; 1178 tun_info = parse_attr->tun_info[out_index]; 1179 family = ip_tunnel_info_af(tun_info); 1180 1181 if (family == AF_INET) { 1182 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; 1183 key.ip_version = 4; 1184 } else if (family == AF_INET6) { 1185 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; 1186 key.ip_version = 6; 1187 } 1188 1189 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, 1190 e->route_dev_ifindex, out_index); 1191 if (err || !(esw_attr->dests[out_index].flags & 1192 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) 1193 return err; 1194 1195 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], 1196 &tbl_time_after); 1197 if (IS_ERR(r)) 1198 return PTR_ERR(r); 1199 /* Routing changed concurrently. FIB event handler might have missed new 1200 * entry, schedule update. 1201 */ 1202 if (tbl_time_before != tbl_time_after) { 1203 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1204 if (err) { 1205 mlx5e_route_put_locked(priv, r); 1206 return err; 1207 } 1208 } 1209 1210 flow->encap_routes[out_index].r = r; 1211 if (new_encap_entry) 1212 list_add(&e->route_list, &r->encap_entries); 1213 flow->encap_routes[out_index].index = out_index; 1214 return 0; 1215 } 1216 1217 void mlx5e_detach_decap_route(struct mlx5e_priv *priv, 1218 struct mlx5e_tc_flow *flow) 1219 { 1220 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1221 struct mlx5e_route_entry *r = flow->decap_route; 1222 1223 if (!r) 1224 return; 1225 1226 mutex_lock(&esw->offloads.encap_tbl_lock); 1227 list_del(&flow->decap_routes); 1228 flow->decap_route = NULL; 1229 1230 if (!refcount_dec_and_test(&r->refcnt)) { 1231 mutex_unlock(&esw->offloads.encap_tbl_lock); 1232 return; 1233 } 1234 hash_del_rcu(&r->hlist); 1235 mutex_unlock(&esw->offloads.encap_tbl_lock); 1236 1237 mlx5e_route_dealloc(priv, r); 1238 } 1239 1240 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 1241 struct mlx5e_tc_flow *flow, 1242 int out_index) 1243 { 1244 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; 1245 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1246 struct mlx5e_encap_entry *e, *tmp; 1247 1248 if (!r) 1249 return; 1250 1251 mutex_lock(&esw->offloads.encap_tbl_lock); 1252 flow->encap_routes[out_index].r = NULL; 1253 1254 if (!refcount_dec_and_test(&r->refcnt)) { 1255 mutex_unlock(&esw->offloads.encap_tbl_lock); 1256 return; 1257 } 1258 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) 1259 list_del_init(&e->route_list); 1260 hash_del_rcu(&r->hlist); 1261 mutex_unlock(&esw->offloads.encap_tbl_lock); 1262 1263 mlx5e_route_dealloc(priv, r); 1264 } 1265 1266 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, 1267 struct mlx5e_encap_entry *e, 1268 struct list_head *encap_flows) 1269 { 1270 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1271 struct mlx5e_tc_flow *flow; 1272 1273 list_for_each_entry(flow, encap_flows, tmp_list) { 1274 struct mlx5_flow_attr *attr = flow->attr; 1275 struct mlx5_esw_flow_attr *esw_attr; 1276 1277 if (!mlx5e_is_offloaded_flow(flow)) 1278 continue; 1279 esw_attr = attr->esw_attr; 1280 1281 if (flow_flag_test(flow, SLOW)) 1282 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1283 else 1284 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1285 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); 1286 attr->modify_hdr = NULL; 1287 1288 esw_attr->dests[flow->tmp_entry_index].flags &= 1289 ~MLX5_ESW_DEST_ENCAP_VALID; 1290 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; 1291 } 1292 1293 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; 1294 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1295 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 1296 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 1297 e->pkt_reformat = NULL; 1298 } 1299 } 1300 1301 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, 1302 struct net_device *tunnel_dev, 1303 struct mlx5e_encap_entry *e, 1304 struct list_head *encap_flows) 1305 { 1306 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1307 struct mlx5e_tc_flow *flow; 1308 int err; 1309 1310 err = ip_tunnel_info_af(e->tun_info) == AF_INET ? 1311 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : 1312 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); 1313 if (err) 1314 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); 1315 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; 1316 1317 list_for_each_entry(flow, encap_flows, tmp_list) { 1318 struct mlx5e_tc_flow_parse_attr *parse_attr; 1319 struct mlx5_flow_attr *attr = flow->attr; 1320 struct mlx5_esw_flow_attr *esw_attr; 1321 struct mlx5_flow_handle *rule; 1322 struct mlx5_flow_spec *spec; 1323 1324 if (flow_flag_test(flow, FAILED)) 1325 continue; 1326 1327 esw_attr = attr->esw_attr; 1328 parse_attr = attr->parse_attr; 1329 spec = &parse_attr->spec; 1330 1331 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, 1332 e->out_dev, e->route_dev_ifindex, 1333 flow->tmp_entry_index); 1334 if (err) { 1335 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); 1336 continue; 1337 } 1338 1339 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); 1340 if (err) { 1341 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", 1342 err); 1343 continue; 1344 } 1345 1346 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1347 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 1348 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 1349 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 1350 goto offload_to_slow_path; 1351 /* update from slow path rule to encap rule */ 1352 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1353 if (IS_ERR(rule)) { 1354 err = PTR_ERR(rule); 1355 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 1356 err); 1357 } else { 1358 flow->rule[0] = rule; 1359 } 1360 } else { 1361 offload_to_slow_path: 1362 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 1363 /* mark the flow's encap dest as non-valid */ 1364 esw_attr->dests[flow->tmp_entry_index].flags &= 1365 ~MLX5_ESW_DEST_ENCAP_VALID; 1366 1367 if (IS_ERR(rule)) { 1368 err = PTR_ERR(rule); 1369 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 1370 err); 1371 } else { 1372 flow->rule[0] = rule; 1373 } 1374 } 1375 flow_flag_set(flow, OFFLOADED); 1376 } 1377 } 1378 1379 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, 1380 struct mlx5e_route_entry *r, 1381 struct list_head *flow_list, 1382 bool replace) 1383 { 1384 struct net_device *tunnel_dev; 1385 struct mlx5e_encap_entry *e; 1386 1387 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1388 if (!tunnel_dev) 1389 return -ENODEV; 1390 1391 list_for_each_entry(e, &r->encap_entries, route_list) { 1392 LIST_HEAD(encap_flows); 1393 1394 mlx5e_take_all_encap_flows(e, &encap_flows); 1395 if (list_empty(&encap_flows)) 1396 continue; 1397 1398 if (mlx5e_route_entry_valid(r)) 1399 mlx5e_invalidate_encap(priv, e, &encap_flows); 1400 1401 if (!replace) { 1402 list_splice(&encap_flows, flow_list); 1403 continue; 1404 } 1405 1406 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); 1407 list_splice(&encap_flows, flow_list); 1408 } 1409 1410 return 0; 1411 } 1412 1413 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, 1414 struct list_head *flow_list) 1415 { 1416 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1417 struct mlx5e_tc_flow *flow; 1418 1419 list_for_each_entry(flow, flow_list, tmp_list) 1420 if (mlx5e_is_offloaded_flow(flow)) 1421 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1422 } 1423 1424 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, 1425 struct list_head *decap_flows) 1426 { 1427 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1428 struct mlx5e_tc_flow *flow; 1429 1430 list_for_each_entry(flow, decap_flows, tmp_list) { 1431 struct mlx5e_tc_flow_parse_attr *parse_attr; 1432 struct mlx5_flow_attr *attr = flow->attr; 1433 struct mlx5_flow_handle *rule; 1434 struct mlx5_flow_spec *spec; 1435 int err; 1436 1437 if (flow_flag_test(flow, FAILED)) 1438 continue; 1439 1440 parse_attr = attr->parse_attr; 1441 spec = &parse_attr->spec; 1442 err = mlx5e_tc_tun_route_lookup(priv, spec, attr); 1443 if (err) { 1444 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", 1445 err); 1446 continue; 1447 } 1448 1449 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1450 if (IS_ERR(rule)) { 1451 err = PTR_ERR(rule); 1452 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", 1453 err); 1454 } else { 1455 flow->rule[0] = rule; 1456 flow_flag_set(flow, OFFLOADED); 1457 } 1458 } 1459 } 1460 1461 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, 1462 struct mlx5e_route_entry *r, 1463 struct list_head *flow_list, 1464 bool replace) 1465 { 1466 struct net_device *tunnel_dev; 1467 LIST_HEAD(decap_flows); 1468 1469 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1470 if (!tunnel_dev) 1471 return -ENODEV; 1472 1473 mlx5e_take_all_route_decap_flows(r, &decap_flows); 1474 if (mlx5e_route_entry_valid(r)) 1475 mlx5e_unoffload_flow_list(priv, &decap_flows); 1476 if (replace) 1477 mlx5e_reoffload_decap(priv, &decap_flows); 1478 1479 list_splice(&decap_flows, flow_list); 1480 1481 return 0; 1482 } 1483 1484 static void mlx5e_tc_fib_event_work(struct work_struct *work) 1485 { 1486 struct mlx5e_tc_fib_event_data *event_data = 1487 container_of(work, struct mlx5e_tc_fib_event_data, work); 1488 struct net_device *ul_dev = event_data->ul_dev; 1489 struct mlx5e_priv *priv = netdev_priv(ul_dev); 1490 struct mlx5e_route_entry *r = event_data->r; 1491 struct mlx5_eswitch *esw; 1492 LIST_HEAD(flow_list); 1493 bool replace; 1494 int err; 1495 1496 /* sync with concurrent neigh updates */ 1497 rtnl_lock(); 1498 esw = priv->mdev->priv.eswitch; 1499 mutex_lock(&esw->offloads.encap_tbl_lock); 1500 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; 1501 1502 if (!mlx5e_route_entry_valid(r) && !replace) 1503 goto out; 1504 1505 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); 1506 if (err) 1507 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", 1508 err); 1509 1510 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); 1511 if (err) 1512 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", 1513 err); 1514 1515 if (replace) 1516 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1517 out: 1518 mutex_unlock(&esw->offloads.encap_tbl_lock); 1519 rtnl_unlock(); 1520 1521 mlx5e_put_flow_list(priv, &flow_list); 1522 mlx5e_route_put(priv, event_data->r); 1523 dev_put(event_data->ul_dev); 1524 kfree(event_data); 1525 } 1526 1527 static struct mlx5e_tc_fib_event_data * 1528 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, 1529 struct net_device *ul_dev, 1530 struct mlx5e_tc_tun_encap *encap, 1531 unsigned long event, 1532 struct fib_notifier_info *info) 1533 { 1534 struct fib_entry_notifier_info *fen_info; 1535 struct mlx5e_tc_fib_event_data *fib_work; 1536 struct mlx5e_route_entry *r; 1537 struct mlx5e_route_key key; 1538 struct net_device *fib_dev; 1539 1540 fen_info = container_of(info, struct fib_entry_notifier_info, info); 1541 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; 1542 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || 1543 fen_info->dst_len != 32) 1544 return NULL; 1545 1546 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1547 if (!fib_work) 1548 return ERR_PTR(-ENOMEM); 1549 1550 key.endpoint_ip.v4 = htonl(fen_info->dst); 1551 key.ip_version = 4; 1552 1553 /* Can't fail after this point because releasing reference to r 1554 * requires obtaining sleeping mutex which we can't do in atomic 1555 * context. 1556 */ 1557 r = mlx5e_route_lookup_for_update(encap, &key); 1558 if (!r) 1559 goto out; 1560 fib_work->r = r; 1561 dev_hold(ul_dev); 1562 1563 return fib_work; 1564 1565 out: 1566 kfree(fib_work); 1567 return NULL; 1568 } 1569 1570 static struct mlx5e_tc_fib_event_data * 1571 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, 1572 struct net_device *ul_dev, 1573 struct mlx5e_tc_tun_encap *encap, 1574 unsigned long event, 1575 struct fib_notifier_info *info) 1576 { 1577 struct fib6_entry_notifier_info *fen_info; 1578 struct mlx5e_tc_fib_event_data *fib_work; 1579 struct mlx5e_route_entry *r; 1580 struct mlx5e_route_key key; 1581 struct net_device *fib_dev; 1582 1583 fen_info = container_of(info, struct fib6_entry_notifier_info, info); 1584 fib_dev = fib6_info_nh_dev(fen_info->rt); 1585 if (fib_dev->netdev_ops != &mlx5e_netdev_ops || 1586 fen_info->rt->fib6_dst.plen != 128) 1587 return NULL; 1588 1589 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1590 if (!fib_work) 1591 return ERR_PTR(-ENOMEM); 1592 1593 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, 1594 sizeof(fen_info->rt->fib6_dst.addr)); 1595 key.ip_version = 6; 1596 1597 /* Can't fail after this point because releasing reference to r 1598 * requires obtaining sleeping mutex which we can't do in atomic 1599 * context. 1600 */ 1601 r = mlx5e_route_lookup_for_update(encap, &key); 1602 if (!r) 1603 goto out; 1604 fib_work->r = r; 1605 dev_hold(ul_dev); 1606 1607 return fib_work; 1608 1609 out: 1610 kfree(fib_work); 1611 return NULL; 1612 } 1613 1614 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) 1615 { 1616 struct mlx5e_tc_fib_event_data *fib_work; 1617 struct fib_notifier_info *info = ptr; 1618 struct mlx5e_tc_tun_encap *encap; 1619 struct net_device *ul_dev; 1620 struct mlx5e_priv *priv; 1621 1622 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); 1623 priv = encap->priv; 1624 ul_dev = priv->netdev; 1625 priv = netdev_priv(ul_dev); 1626 1627 switch (event) { 1628 case FIB_EVENT_ENTRY_REPLACE: 1629 case FIB_EVENT_ENTRY_DEL: 1630 if (info->family == AF_INET) 1631 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); 1632 else if (info->family == AF_INET6) 1633 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); 1634 else 1635 return NOTIFY_DONE; 1636 1637 if (!IS_ERR_OR_NULL(fib_work)) { 1638 queue_work(priv->wq, &fib_work->work); 1639 } else if (IS_ERR(fib_work)) { 1640 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); 1641 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", 1642 PTR_ERR(fib_work)); 1643 } 1644 1645 break; 1646 default: 1647 return NOTIFY_DONE; 1648 } 1649 1650 return NOTIFY_DONE; 1651 } 1652 1653 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) 1654 { 1655 struct mlx5e_tc_tun_encap *encap; 1656 int err; 1657 1658 encap = kvzalloc(sizeof(*encap), GFP_KERNEL); 1659 if (!encap) 1660 return ERR_PTR(-ENOMEM); 1661 1662 encap->priv = priv; 1663 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; 1664 spin_lock_init(&encap->route_lock); 1665 hash_init(encap->route_tbl); 1666 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, 1667 NULL, NULL); 1668 if (err) { 1669 kvfree(encap); 1670 return ERR_PTR(err); 1671 } 1672 1673 return encap; 1674 } 1675 1676 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) 1677 { 1678 if (!encap) 1679 return; 1680 1681 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); 1682 flush_workqueue(encap->priv->wq); /* flush fib event works */ 1683 kvfree(encap); 1684 } 1685