1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <net/fib_notifier.h> 5 #include <net/nexthop.h> 6 #include "tc_tun_encap.h" 7 #include "en_tc.h" 8 #include "tc_tun.h" 9 #include "rep/tc.h" 10 #include "diag/en_tc_tracepoint.h" 11 12 enum { 13 MLX5E_ROUTE_ENTRY_VALID = BIT(0), 14 }; 15 16 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, 17 struct mlx5_flow_attr *attr, 18 struct mlx5e_encap_entry *e, 19 int out_index) 20 { 21 struct net_device *route_dev; 22 int err = 0; 23 24 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); 25 26 if (!route_dev || !netif_is_ovs_master(route_dev)) 27 goto out; 28 29 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, 30 MLX5E_TC_INT_PORT_EGRESS, 31 &attr->action, out_index); 32 33 out: 34 if (route_dev) 35 dev_put(route_dev); 36 37 return err; 38 } 39 40 struct mlx5e_route_key { 41 int ip_version; 42 union { 43 __be32 v4; 44 struct in6_addr v6; 45 } endpoint_ip; 46 }; 47 48 struct mlx5e_route_entry { 49 struct mlx5e_route_key key; 50 struct list_head encap_entries; 51 struct list_head decap_flows; 52 u32 flags; 53 struct hlist_node hlist; 54 refcount_t refcnt; 55 int tunnel_dev_index; 56 struct rcu_head rcu; 57 }; 58 59 struct mlx5e_tc_tun_encap { 60 struct mlx5e_priv *priv; 61 struct notifier_block fib_nb; 62 spinlock_t route_lock; /* protects route_tbl */ 63 unsigned long route_tbl_last_update; 64 DECLARE_HASHTABLE(route_tbl, 8); 65 }; 66 67 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) 68 { 69 return r->flags & MLX5E_ROUTE_ENTRY_VALID; 70 } 71 72 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, 73 struct mlx5_flow_spec *spec) 74 { 75 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 76 struct mlx5_rx_tun_attr *tun_attr; 77 void *daddr, *saddr; 78 u8 ip_version; 79 80 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); 81 if (!tun_attr) 82 return -ENOMEM; 83 84 esw_attr->rx_tun_attr = tun_attr; 85 ip_version = mlx5e_tc_get_ip_version(spec, true); 86 87 if (ip_version == 4) { 88 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 89 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 90 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 91 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); 92 tun_attr->dst_ip.v4 = *(__be32 *)daddr; 93 tun_attr->src_ip.v4 = *(__be32 *)saddr; 94 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) 95 return 0; 96 } 97 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) 98 else if (ip_version == 6) { 99 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); 100 struct in6_addr zerov6 = {}; 101 102 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 103 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); 104 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 105 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); 106 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); 107 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); 108 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || 109 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) 110 return 0; 111 } 112 #endif 113 /* Only set the flag if both src and dst ip addresses exist. They are 114 * required to establish routing. 115 */ 116 flow_flag_set(flow, TUN_RX); 117 flow->attr->tun_ip_version = ip_version; 118 return 0; 119 } 120 121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) 122 { 123 bool all_flow_encaps_valid = true; 124 int i; 125 126 /* Flow can be associated with multiple encap entries. 127 * Before offloading the flow verify that all of them have 128 * a valid neighbour. 129 */ 130 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 131 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) 132 continue; 133 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { 134 all_flow_encaps_valid = false; 135 break; 136 } 137 } 138 139 return all_flow_encaps_valid; 140 } 141 142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, 143 struct mlx5e_encap_entry *e, 144 struct list_head *flow_list) 145 { 146 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 147 struct mlx5_pkt_reformat_params reformat_params; 148 struct mlx5_esw_flow_attr *esw_attr; 149 struct mlx5_flow_handle *rule; 150 struct mlx5_flow_attr *attr; 151 struct mlx5_flow_spec *spec; 152 struct mlx5e_tc_flow *flow; 153 int err; 154 155 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) 156 return; 157 158 memset(&reformat_params, 0, sizeof(reformat_params)); 159 reformat_params.type = e->reformat_type; 160 reformat_params.size = e->encap_size; 161 reformat_params.data = e->encap_header; 162 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 163 &reformat_params, 164 MLX5_FLOW_NAMESPACE_FDB); 165 if (IS_ERR(e->pkt_reformat)) { 166 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", 167 PTR_ERR(e->pkt_reformat)); 168 return; 169 } 170 e->flags |= MLX5_ENCAP_ENTRY_VALID; 171 mlx5e_rep_queue_neigh_stats_work(priv); 172 173 list_for_each_entry(flow, flow_list, tmp_list) { 174 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) 175 continue; 176 177 spec = &flow->attr->parse_attr->spec; 178 179 attr = mlx5e_tc_get_encap_attr(flow); 180 esw_attr = attr->esw_attr; 181 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 182 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 183 184 /* Do not offload flows with unresolved neighbors */ 185 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 186 continue; 187 188 err = mlx5e_tc_offload_flow_post_acts(flow); 189 if (err) { 190 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", 191 err); 192 continue; 193 } 194 195 /* update from slow path rule to encap rule */ 196 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); 197 if (IS_ERR(rule)) { 198 mlx5e_tc_unoffload_flow_post_acts(flow); 199 err = PTR_ERR(rule); 200 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 201 err); 202 continue; 203 } 204 205 mlx5e_tc_unoffload_from_slow_path(esw, flow); 206 flow->rule[0] = rule; 207 /* was unset when slow path rule removed */ 208 flow_flag_set(flow, OFFLOADED); 209 } 210 } 211 212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, 213 struct mlx5e_encap_entry *e, 214 struct list_head *flow_list) 215 { 216 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 217 struct mlx5_esw_flow_attr *esw_attr; 218 struct mlx5_flow_handle *rule; 219 struct mlx5_flow_attr *attr; 220 struct mlx5_flow_spec *spec; 221 struct mlx5e_tc_flow *flow; 222 int err; 223 224 list_for_each_entry(flow, flow_list, tmp_list) { 225 if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) 226 continue; 227 spec = &flow->attr->parse_attr->spec; 228 229 /* update from encap rule to slow path rule */ 230 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 231 232 attr = mlx5e_tc_get_encap_attr(flow); 233 esw_attr = attr->esw_attr; 234 /* mark the flow's encap dest as non-valid */ 235 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; 236 237 if (IS_ERR(rule)) { 238 err = PTR_ERR(rule); 239 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 240 err); 241 continue; 242 } 243 244 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 245 mlx5e_tc_unoffload_flow_post_acts(flow); 246 flow->rule[0] = rule; 247 /* was unset when fast path rule removed */ 248 flow_flag_set(flow, OFFLOADED); 249 } 250 251 /* we know that the encap is valid */ 252 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 253 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 254 } 255 256 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, 257 struct list_head *flow_list, 258 int index) 259 { 260 if (IS_ERR(mlx5e_flow_get(flow))) { 261 /* Flow is being deleted concurrently. Wait for it to be 262 * unoffloaded from hardware, otherwise deleting encap will 263 * fail. 264 */ 265 wait_for_completion(&flow->del_hw_done); 266 return; 267 } 268 wait_for_completion(&flow->init_done); 269 270 flow->tmp_entry_index = index; 271 list_add(&flow->tmp_list, flow_list); 272 } 273 274 /* Takes reference to all flows attached to encap and adds the flows to 275 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 276 */ 277 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) 278 { 279 struct encap_flow_item *efi; 280 struct mlx5e_tc_flow *flow; 281 282 list_for_each_entry(efi, &e->flows, list) { 283 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); 284 mlx5e_take_tmp_flow(flow, flow_list, efi->index); 285 } 286 } 287 288 /* Takes reference to all flows attached to route and adds the flows to 289 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 290 */ 291 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, 292 struct list_head *flow_list) 293 { 294 struct mlx5e_tc_flow *flow; 295 296 list_for_each_entry(flow, &r->decap_flows, decap_routes) 297 mlx5e_take_tmp_flow(flow, flow_list, 0); 298 } 299 300 typedef bool (match_cb)(struct mlx5e_encap_entry *); 301 302 static struct mlx5e_encap_entry * 303 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, 304 struct mlx5e_encap_entry *e, 305 match_cb match) 306 { 307 struct mlx5e_encap_entry *next = NULL; 308 309 retry: 310 rcu_read_lock(); 311 312 /* find encap with non-zero reference counter value */ 313 for (next = e ? 314 list_next_or_null_rcu(&nhe->encap_list, 315 &e->encap_list, 316 struct mlx5e_encap_entry, 317 encap_list) : 318 list_first_or_null_rcu(&nhe->encap_list, 319 struct mlx5e_encap_entry, 320 encap_list); 321 next; 322 next = list_next_or_null_rcu(&nhe->encap_list, 323 &next->encap_list, 324 struct mlx5e_encap_entry, 325 encap_list)) 326 if (mlx5e_encap_take(next)) 327 break; 328 329 rcu_read_unlock(); 330 331 /* release starting encap */ 332 if (e) 333 mlx5e_encap_put(netdev_priv(e->out_dev), e); 334 if (!next) 335 return next; 336 337 /* wait for encap to be fully initialized */ 338 wait_for_completion(&next->res_ready); 339 /* continue searching if encap entry is not in valid state after completion */ 340 if (!match(next)) { 341 e = next; 342 goto retry; 343 } 344 345 return next; 346 } 347 348 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) 349 { 350 return e->flags & MLX5_ENCAP_ENTRY_VALID; 351 } 352 353 static struct mlx5e_encap_entry * 354 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 355 struct mlx5e_encap_entry *e) 356 { 357 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); 358 } 359 360 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) 361 { 362 return e->compl_result >= 0; 363 } 364 365 struct mlx5e_encap_entry * 366 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, 367 struct mlx5e_encap_entry *e) 368 { 369 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); 370 } 371 372 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) 373 { 374 struct mlx5e_neigh *m_neigh = &nhe->m_neigh; 375 struct mlx5e_encap_entry *e = NULL; 376 struct mlx5e_tc_flow *flow; 377 struct mlx5_fc *counter; 378 struct neigh_table *tbl; 379 bool neigh_used = false; 380 struct neighbour *n; 381 u64 lastuse; 382 383 if (m_neigh->family == AF_INET) 384 tbl = &arp_tbl; 385 #if IS_ENABLED(CONFIG_IPV6) 386 else if (m_neigh->family == AF_INET6) 387 tbl = ipv6_stub->nd_tbl; 388 #endif 389 else 390 return; 391 392 /* mlx5e_get_next_valid_encap() releases previous encap before returning 393 * next one. 394 */ 395 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { 396 struct mlx5e_priv *priv = netdev_priv(e->out_dev); 397 struct encap_flow_item *efi, *tmp; 398 struct mlx5_eswitch *esw; 399 LIST_HEAD(flow_list); 400 401 esw = priv->mdev->priv.eswitch; 402 mutex_lock(&esw->offloads.encap_tbl_lock); 403 list_for_each_entry_safe(efi, tmp, &e->flows, list) { 404 flow = container_of(efi, struct mlx5e_tc_flow, 405 encaps[efi->index]); 406 if (IS_ERR(mlx5e_flow_get(flow))) 407 continue; 408 list_add(&flow->tmp_list, &flow_list); 409 410 if (mlx5e_is_offloaded_flow(flow)) { 411 counter = mlx5e_tc_get_counter(flow); 412 lastuse = mlx5_fc_query_lastuse(counter); 413 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { 414 neigh_used = true; 415 break; 416 } 417 } 418 } 419 mutex_unlock(&esw->offloads.encap_tbl_lock); 420 421 mlx5e_put_flow_list(priv, &flow_list); 422 if (neigh_used) { 423 /* release current encap before breaking the loop */ 424 mlx5e_encap_put(priv, e); 425 break; 426 } 427 } 428 429 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); 430 431 if (neigh_used) { 432 nhe->reported_lastuse = jiffies; 433 434 /* find the relevant neigh according to the cached device and 435 * dst ip pair 436 */ 437 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); 438 if (!n) 439 return; 440 441 neigh_event_send(n, NULL); 442 neigh_release(n); 443 } 444 } 445 446 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 447 { 448 WARN_ON(!list_empty(&e->flows)); 449 450 if (e->compl_result > 0) { 451 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); 452 453 if (e->flags & MLX5_ENCAP_ENTRY_VALID) 454 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 455 } 456 457 kfree(e->tun_info); 458 kfree(e->encap_header); 459 kfree_rcu(e, rcu); 460 } 461 462 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, 463 struct mlx5e_decap_entry *d) 464 { 465 WARN_ON(!list_empty(&d->flows)); 466 467 if (!d->compl_result) 468 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); 469 470 kfree_rcu(d, rcu); 471 } 472 473 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 474 { 475 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 476 477 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) 478 return; 479 list_del(&e->route_list); 480 hash_del_rcu(&e->encap_hlist); 481 mutex_unlock(&esw->offloads.encap_tbl_lock); 482 483 mlx5e_encap_dealloc(priv, e); 484 } 485 486 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) 487 { 488 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 489 490 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) 491 return; 492 hash_del_rcu(&d->hlist); 493 mutex_unlock(&esw->offloads.decap_tbl_lock); 494 495 mlx5e_decap_dealloc(priv, d); 496 } 497 498 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 499 struct mlx5e_tc_flow *flow, 500 int out_index); 501 502 void mlx5e_detach_encap(struct mlx5e_priv *priv, 503 struct mlx5e_tc_flow *flow, 504 struct mlx5_flow_attr *attr, 505 int out_index) 506 { 507 struct mlx5e_encap_entry *e = flow->encaps[out_index].e; 508 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 509 510 if (!mlx5e_is_eswitch_flow(flow)) 511 return; 512 513 if (attr->esw_attr->dests[out_index].flags & 514 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) 515 mlx5e_detach_encap_route(priv, flow, out_index); 516 517 /* flow wasn't fully initialized */ 518 if (!e) 519 return; 520 521 mutex_lock(&esw->offloads.encap_tbl_lock); 522 list_del(&flow->encaps[out_index].list); 523 flow->encaps[out_index].e = NULL; 524 if (!refcount_dec_and_test(&e->refcnt)) { 525 mutex_unlock(&esw->offloads.encap_tbl_lock); 526 return; 527 } 528 list_del(&e->route_list); 529 hash_del_rcu(&e->encap_hlist); 530 mutex_unlock(&esw->offloads.encap_tbl_lock); 531 532 mlx5e_encap_dealloc(priv, e); 533 } 534 535 void mlx5e_detach_decap(struct mlx5e_priv *priv, 536 struct mlx5e_tc_flow *flow) 537 { 538 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 539 struct mlx5e_decap_entry *d = flow->decap_reformat; 540 541 if (!d) 542 return; 543 544 mutex_lock(&esw->offloads.decap_tbl_lock); 545 list_del(&flow->l3_to_l2_reformat); 546 flow->decap_reformat = NULL; 547 548 if (!refcount_dec_and_test(&d->refcnt)) { 549 mutex_unlock(&esw->offloads.decap_tbl_lock); 550 return; 551 } 552 hash_del_rcu(&d->hlist); 553 mutex_unlock(&esw->offloads.decap_tbl_lock); 554 555 mlx5e_decap_dealloc(priv, d); 556 } 557 558 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, 559 struct mlx5e_encap_key *b) 560 { 561 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && 562 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; 563 } 564 565 static int cmp_decap_info(struct mlx5e_decap_key *a, 566 struct mlx5e_decap_key *b) 567 { 568 return memcmp(&a->key, &b->key, sizeof(b->key)); 569 } 570 571 static int hash_encap_info(struct mlx5e_encap_key *key) 572 { 573 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), 574 key->tc_tunnel->tunnel_type); 575 } 576 577 static int hash_decap_info(struct mlx5e_decap_key *key) 578 { 579 return jhash(&key->key, sizeof(key->key), 0); 580 } 581 582 bool mlx5e_encap_take(struct mlx5e_encap_entry *e) 583 { 584 return refcount_inc_not_zero(&e->refcnt); 585 } 586 587 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) 588 { 589 return refcount_inc_not_zero(&e->refcnt); 590 } 591 592 static struct mlx5e_encap_entry * 593 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, 594 uintptr_t hash_key) 595 { 596 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 597 struct mlx5e_encap_key e_key; 598 struct mlx5e_encap_entry *e; 599 600 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, 601 encap_hlist, hash_key) { 602 e_key.ip_tun_key = &e->tun_info->key; 603 e_key.tc_tunnel = e->tunnel; 604 if (e->tunnel->encap_info_equal(&e_key, key) && 605 mlx5e_encap_take(e)) 606 return e; 607 } 608 609 return NULL; 610 } 611 612 static struct mlx5e_decap_entry * 613 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, 614 uintptr_t hash_key) 615 { 616 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 617 struct mlx5e_decap_key r_key; 618 struct mlx5e_decap_entry *e; 619 620 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, 621 hlist, hash_key) { 622 r_key = e->key; 623 if (!cmp_decap_info(&r_key, key) && 624 mlx5e_decap_take(e)) 625 return e; 626 } 627 return NULL; 628 } 629 630 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) 631 { 632 size_t tun_size = sizeof(*tun_info) + tun_info->options_len; 633 634 return kmemdup(tun_info, tun_size, GFP_KERNEL); 635 } 636 637 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, 638 struct mlx5e_tc_flow *flow, 639 int out_index, 640 struct mlx5e_encap_entry *e, 641 struct netlink_ext_ack *extack) 642 { 643 int i; 644 645 for (i = 0; i < out_index; i++) { 646 if (flow->encaps[i].e != e) 647 continue; 648 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); 649 netdev_err(priv->netdev, "can't duplicate encap action\n"); 650 return true; 651 } 652 653 return false; 654 } 655 656 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, 657 struct mlx5_flow_attr *attr, 658 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 659 struct net_device *out_dev, 660 int route_dev_ifindex, 661 int out_index) 662 { 663 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 664 struct net_device *route_dev; 665 u16 vport_num; 666 int err = 0; 667 u32 data; 668 669 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 670 671 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 672 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) 673 goto out; 674 675 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 676 if (err) 677 goto out; 678 679 attr->dest_chain = 0; 680 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 681 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 682 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, 683 vport_num); 684 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, 685 MLX5_FLOW_NAMESPACE_FDB, 686 VPORT_TO_REG, data); 687 if (err >= 0) { 688 esw_attr->dests[out_index].src_port_rewrite_act_id = err; 689 err = 0; 690 } 691 692 out: 693 if (route_dev) 694 dev_put(route_dev); 695 return err; 696 } 697 698 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, 699 struct mlx5_esw_flow_attr *attr, 700 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 701 struct net_device *out_dev, 702 int route_dev_ifindex, 703 int out_index) 704 { 705 int act_id = attr->dests[out_index].src_port_rewrite_act_id; 706 struct net_device *route_dev; 707 u16 vport_num; 708 int err = 0; 709 u32 data; 710 711 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 712 713 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 714 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { 715 err = -ENODEV; 716 goto out; 717 } 718 719 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 720 if (err) 721 goto out; 722 723 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, 724 vport_num); 725 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); 726 727 out: 728 if (route_dev) 729 dev_put(route_dev); 730 return err; 731 } 732 733 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) 734 { 735 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 736 struct mlx5_rep_uplink_priv *uplink_priv; 737 struct mlx5e_rep_priv *uplink_rpriv; 738 struct mlx5e_tc_tun_encap *encap; 739 unsigned int ret; 740 741 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 742 uplink_priv = &uplink_rpriv->uplink_priv; 743 encap = uplink_priv->encap; 744 745 spin_lock_bh(&encap->route_lock); 746 ret = encap->route_tbl_last_update; 747 spin_unlock_bh(&encap->route_lock); 748 return ret; 749 } 750 751 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 752 struct mlx5e_tc_flow *flow, 753 struct mlx5_flow_attr *attr, 754 struct mlx5e_encap_entry *e, 755 bool new_encap_entry, 756 unsigned long tbl_time_before, 757 int out_index); 758 759 int mlx5e_attach_encap(struct mlx5e_priv *priv, 760 struct mlx5e_tc_flow *flow, 761 struct mlx5_flow_attr *attr, 762 struct net_device *mirred_dev, 763 int out_index, 764 struct netlink_ext_ack *extack, 765 struct net_device **encap_dev, 766 bool *encap_valid) 767 { 768 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 769 struct mlx5e_tc_flow_parse_attr *parse_attr; 770 const struct ip_tunnel_info *tun_info; 771 const struct mlx5e_mpls_info *mpls_info; 772 unsigned long tbl_time_before = 0; 773 struct mlx5e_encap_entry *e; 774 struct mlx5e_encap_key key; 775 bool entry_created = false; 776 unsigned short family; 777 uintptr_t hash_key; 778 int err = 0; 779 780 parse_attr = attr->parse_attr; 781 tun_info = parse_attr->tun_info[out_index]; 782 mpls_info = &parse_attr->mpls_info[out_index]; 783 family = ip_tunnel_info_af(tun_info); 784 key.ip_tun_key = &tun_info->key; 785 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); 786 if (!key.tc_tunnel) { 787 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); 788 return -EOPNOTSUPP; 789 } 790 791 hash_key = hash_encap_info(&key); 792 793 mutex_lock(&esw->offloads.encap_tbl_lock); 794 e = mlx5e_encap_get(priv, &key, hash_key); 795 796 /* must verify if encap is valid or not */ 797 if (e) { 798 /* Check that entry was not already attached to this flow */ 799 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { 800 err = -EOPNOTSUPP; 801 goto out_err; 802 } 803 804 mutex_unlock(&esw->offloads.encap_tbl_lock); 805 wait_for_completion(&e->res_ready); 806 807 /* Protect against concurrent neigh update. */ 808 mutex_lock(&esw->offloads.encap_tbl_lock); 809 if (e->compl_result < 0) { 810 err = -EREMOTEIO; 811 goto out_err; 812 } 813 goto attach_flow; 814 } 815 816 e = kzalloc(sizeof(*e), GFP_KERNEL); 817 if (!e) { 818 err = -ENOMEM; 819 goto out_err; 820 } 821 822 refcount_set(&e->refcnt, 1); 823 init_completion(&e->res_ready); 824 entry_created = true; 825 INIT_LIST_HEAD(&e->route_list); 826 827 tun_info = mlx5e_dup_tun_info(tun_info); 828 if (!tun_info) { 829 err = -ENOMEM; 830 goto out_err_init; 831 } 832 e->tun_info = tun_info; 833 memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info)); 834 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); 835 if (err) 836 goto out_err_init; 837 838 INIT_LIST_HEAD(&e->flows); 839 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); 840 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 841 mutex_unlock(&esw->offloads.encap_tbl_lock); 842 843 if (family == AF_INET) 844 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); 845 else if (family == AF_INET6) 846 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); 847 848 /* Protect against concurrent neigh update. */ 849 mutex_lock(&esw->offloads.encap_tbl_lock); 850 complete_all(&e->res_ready); 851 if (err) { 852 e->compl_result = err; 853 goto out_err; 854 } 855 e->compl_result = 1; 856 857 attach_flow: 858 err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created, 859 tbl_time_before, out_index); 860 if (err) 861 goto out_err; 862 863 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index); 864 if (err == -EOPNOTSUPP) { 865 /* If device doesn't support int port offload, 866 * redirect to uplink vport. 867 */ 868 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n"); 869 err = 0; 870 } else if (err) { 871 goto out_err; 872 } 873 874 flow->encaps[out_index].e = e; 875 list_add(&flow->encaps[out_index].list, &e->flows); 876 flow->encaps[out_index].index = out_index; 877 *encap_dev = e->out_dev; 878 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 879 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; 880 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 881 *encap_valid = true; 882 } else { 883 *encap_valid = false; 884 } 885 mutex_unlock(&esw->offloads.encap_tbl_lock); 886 887 return err; 888 889 out_err: 890 mutex_unlock(&esw->offloads.encap_tbl_lock); 891 if (e) 892 mlx5e_encap_put(priv, e); 893 return err; 894 895 out_err_init: 896 mutex_unlock(&esw->offloads.encap_tbl_lock); 897 kfree(tun_info); 898 kfree(e); 899 return err; 900 } 901 902 int mlx5e_attach_decap(struct mlx5e_priv *priv, 903 struct mlx5e_tc_flow *flow, 904 struct netlink_ext_ack *extack) 905 { 906 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 907 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 908 struct mlx5_pkt_reformat_params reformat_params; 909 struct mlx5e_decap_entry *d; 910 struct mlx5e_decap_key key; 911 uintptr_t hash_key; 912 int err = 0; 913 914 if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { 915 NL_SET_ERR_MSG_MOD(extack, 916 "encap header larger than max supported"); 917 return -EOPNOTSUPP; 918 } 919 920 key.key = attr->eth; 921 hash_key = hash_decap_info(&key); 922 mutex_lock(&esw->offloads.decap_tbl_lock); 923 d = mlx5e_decap_get(priv, &key, hash_key); 924 if (d) { 925 mutex_unlock(&esw->offloads.decap_tbl_lock); 926 wait_for_completion(&d->res_ready); 927 mutex_lock(&esw->offloads.decap_tbl_lock); 928 if (d->compl_result) { 929 err = -EREMOTEIO; 930 goto out_free; 931 } 932 goto found; 933 } 934 935 d = kzalloc(sizeof(*d), GFP_KERNEL); 936 if (!d) { 937 err = -ENOMEM; 938 goto out_err; 939 } 940 941 d->key = key; 942 refcount_set(&d->refcnt, 1); 943 init_completion(&d->res_ready); 944 INIT_LIST_HEAD(&d->flows); 945 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); 946 mutex_unlock(&esw->offloads.decap_tbl_lock); 947 948 memset(&reformat_params, 0, sizeof(reformat_params)); 949 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; 950 reformat_params.size = sizeof(attr->eth); 951 reformat_params.data = &attr->eth; 952 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 953 &reformat_params, 954 MLX5_FLOW_NAMESPACE_FDB); 955 if (IS_ERR(d->pkt_reformat)) { 956 err = PTR_ERR(d->pkt_reformat); 957 d->compl_result = err; 958 } 959 mutex_lock(&esw->offloads.decap_tbl_lock); 960 complete_all(&d->res_ready); 961 if (err) 962 goto out_free; 963 964 found: 965 flow->decap_reformat = d; 966 attr->decap_pkt_reformat = d->pkt_reformat; 967 list_add(&flow->l3_to_l2_reformat, &d->flows); 968 mutex_unlock(&esw->offloads.decap_tbl_lock); 969 return 0; 970 971 out_free: 972 mutex_unlock(&esw->offloads.decap_tbl_lock); 973 mlx5e_decap_put(priv, d); 974 return err; 975 976 out_err: 977 mutex_unlock(&esw->offloads.decap_tbl_lock); 978 return err; 979 } 980 981 static int cmp_route_info(struct mlx5e_route_key *a, 982 struct mlx5e_route_key *b) 983 { 984 if (a->ip_version == 4 && b->ip_version == 4) 985 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, 986 sizeof(a->endpoint_ip.v4)); 987 else if (a->ip_version == 6 && b->ip_version == 6) 988 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, 989 sizeof(a->endpoint_ip.v6)); 990 return 1; 991 } 992 993 static u32 hash_route_info(struct mlx5e_route_key *key) 994 { 995 if (key->ip_version == 4) 996 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); 997 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); 998 } 999 1000 static void mlx5e_route_dealloc(struct mlx5e_priv *priv, 1001 struct mlx5e_route_entry *r) 1002 { 1003 WARN_ON(!list_empty(&r->decap_flows)); 1004 WARN_ON(!list_empty(&r->encap_entries)); 1005 1006 kfree_rcu(r, rcu); 1007 } 1008 1009 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 1010 { 1011 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1012 1013 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) 1014 return; 1015 1016 hash_del_rcu(&r->hlist); 1017 mutex_unlock(&esw->offloads.encap_tbl_lock); 1018 1019 mlx5e_route_dealloc(priv, r); 1020 } 1021 1022 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 1023 { 1024 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1025 1026 lockdep_assert_held(&esw->offloads.encap_tbl_lock); 1027 1028 if (!refcount_dec_and_test(&r->refcnt)) 1029 return; 1030 hash_del_rcu(&r->hlist); 1031 mlx5e_route_dealloc(priv, r); 1032 } 1033 1034 static struct mlx5e_route_entry * 1035 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, 1036 u32 hash_key) 1037 { 1038 struct mlx5e_route_key r_key; 1039 struct mlx5e_route_entry *r; 1040 1041 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { 1042 r_key = r->key; 1043 if (!cmp_route_info(&r_key, key) && 1044 refcount_inc_not_zero(&r->refcnt)) 1045 return r; 1046 } 1047 return NULL; 1048 } 1049 1050 static struct mlx5e_route_entry * 1051 mlx5e_route_get_create(struct mlx5e_priv *priv, 1052 struct mlx5e_route_key *key, 1053 int tunnel_dev_index, 1054 unsigned long *route_tbl_change_time) 1055 { 1056 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1057 struct mlx5_rep_uplink_priv *uplink_priv; 1058 struct mlx5e_rep_priv *uplink_rpriv; 1059 struct mlx5e_tc_tun_encap *encap; 1060 struct mlx5e_route_entry *r; 1061 u32 hash_key; 1062 1063 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1064 uplink_priv = &uplink_rpriv->uplink_priv; 1065 encap = uplink_priv->encap; 1066 1067 hash_key = hash_route_info(key); 1068 spin_lock_bh(&encap->route_lock); 1069 r = mlx5e_route_get(encap, key, hash_key); 1070 spin_unlock_bh(&encap->route_lock); 1071 if (r) { 1072 if (!mlx5e_route_entry_valid(r)) { 1073 mlx5e_route_put_locked(priv, r); 1074 return ERR_PTR(-EINVAL); 1075 } 1076 return r; 1077 } 1078 1079 r = kzalloc(sizeof(*r), GFP_KERNEL); 1080 if (!r) 1081 return ERR_PTR(-ENOMEM); 1082 1083 r->key = *key; 1084 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1085 r->tunnel_dev_index = tunnel_dev_index; 1086 refcount_set(&r->refcnt, 1); 1087 INIT_LIST_HEAD(&r->decap_flows); 1088 INIT_LIST_HEAD(&r->encap_entries); 1089 1090 spin_lock_bh(&encap->route_lock); 1091 *route_tbl_change_time = encap->route_tbl_last_update; 1092 hash_add(encap->route_tbl, &r->hlist, hash_key); 1093 spin_unlock_bh(&encap->route_lock); 1094 1095 return r; 1096 } 1097 1098 static struct mlx5e_route_entry * 1099 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) 1100 { 1101 u32 hash_key = hash_route_info(key); 1102 struct mlx5e_route_entry *r; 1103 1104 spin_lock_bh(&encap->route_lock); 1105 encap->route_tbl_last_update = jiffies; 1106 r = mlx5e_route_get(encap, key, hash_key); 1107 spin_unlock_bh(&encap->route_lock); 1108 1109 return r; 1110 } 1111 1112 struct mlx5e_tc_fib_event_data { 1113 struct work_struct work; 1114 unsigned long event; 1115 struct mlx5e_route_entry *r; 1116 struct net_device *ul_dev; 1117 }; 1118 1119 static void mlx5e_tc_fib_event_work(struct work_struct *work); 1120 static struct mlx5e_tc_fib_event_data * 1121 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) 1122 { 1123 struct mlx5e_tc_fib_event_data *fib_work; 1124 1125 fib_work = kzalloc(sizeof(*fib_work), flags); 1126 if (WARN_ON(!fib_work)) 1127 return NULL; 1128 1129 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); 1130 fib_work->event = event; 1131 fib_work->ul_dev = ul_dev; 1132 1133 return fib_work; 1134 } 1135 1136 static int 1137 mlx5e_route_enqueue_update(struct mlx5e_priv *priv, 1138 struct mlx5e_route_entry *r, 1139 unsigned long event) 1140 { 1141 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1142 struct mlx5e_tc_fib_event_data *fib_work; 1143 struct mlx5e_rep_priv *uplink_rpriv; 1144 struct net_device *ul_dev; 1145 1146 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1147 ul_dev = uplink_rpriv->netdev; 1148 1149 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); 1150 if (!fib_work) 1151 return -ENOMEM; 1152 1153 dev_hold(ul_dev); 1154 refcount_inc(&r->refcnt); 1155 fib_work->r = r; 1156 queue_work(priv->wq, &fib_work->work); 1157 1158 return 0; 1159 } 1160 1161 int mlx5e_attach_decap_route(struct mlx5e_priv *priv, 1162 struct mlx5e_tc_flow *flow) 1163 { 1164 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1165 unsigned long tbl_time_before, tbl_time_after; 1166 struct mlx5e_tc_flow_parse_attr *parse_attr; 1167 struct mlx5_flow_attr *attr = flow->attr; 1168 struct mlx5_esw_flow_attr *esw_attr; 1169 struct mlx5e_route_entry *r; 1170 struct mlx5e_route_key key; 1171 int err = 0; 1172 1173 esw_attr = attr->esw_attr; 1174 parse_attr = attr->parse_attr; 1175 mutex_lock(&esw->offloads.encap_tbl_lock); 1176 if (!esw_attr->rx_tun_attr) 1177 goto out; 1178 1179 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 1180 tbl_time_after = tbl_time_before; 1181 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev); 1182 if (err || !esw_attr->rx_tun_attr->decap_vport) 1183 goto out; 1184 1185 key.ip_version = attr->tun_ip_version; 1186 if (key.ip_version == 4) 1187 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; 1188 else 1189 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; 1190 1191 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, 1192 &tbl_time_after); 1193 if (IS_ERR(r)) { 1194 err = PTR_ERR(r); 1195 goto out; 1196 } 1197 /* Routing changed concurrently. FIB event handler might have missed new 1198 * entry, schedule update. 1199 */ 1200 if (tbl_time_before != tbl_time_after) { 1201 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1202 if (err) { 1203 mlx5e_route_put_locked(priv, r); 1204 goto out; 1205 } 1206 } 1207 1208 flow->decap_route = r; 1209 list_add(&flow->decap_routes, &r->decap_flows); 1210 mutex_unlock(&esw->offloads.encap_tbl_lock); 1211 return 0; 1212 1213 out: 1214 mutex_unlock(&esw->offloads.encap_tbl_lock); 1215 return err; 1216 } 1217 1218 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 1219 struct mlx5e_tc_flow *flow, 1220 struct mlx5_flow_attr *attr, 1221 struct mlx5e_encap_entry *e, 1222 bool new_encap_entry, 1223 unsigned long tbl_time_before, 1224 int out_index) 1225 { 1226 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1227 unsigned long tbl_time_after = tbl_time_before; 1228 struct mlx5e_tc_flow_parse_attr *parse_attr; 1229 const struct ip_tunnel_info *tun_info; 1230 struct mlx5_esw_flow_attr *esw_attr; 1231 struct mlx5e_route_entry *r; 1232 struct mlx5e_route_key key; 1233 unsigned short family; 1234 int err = 0; 1235 1236 esw_attr = attr->esw_attr; 1237 parse_attr = attr->parse_attr; 1238 tun_info = parse_attr->tun_info[out_index]; 1239 family = ip_tunnel_info_af(tun_info); 1240 1241 if (family == AF_INET) { 1242 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; 1243 key.ip_version = 4; 1244 } else if (family == AF_INET6) { 1245 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; 1246 key.ip_version = 6; 1247 } 1248 1249 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, 1250 e->route_dev_ifindex, out_index); 1251 if (err || !(esw_attr->dests[out_index].flags & 1252 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) 1253 return err; 1254 1255 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], 1256 &tbl_time_after); 1257 if (IS_ERR(r)) 1258 return PTR_ERR(r); 1259 /* Routing changed concurrently. FIB event handler might have missed new 1260 * entry, schedule update. 1261 */ 1262 if (tbl_time_before != tbl_time_after) { 1263 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1264 if (err) { 1265 mlx5e_route_put_locked(priv, r); 1266 return err; 1267 } 1268 } 1269 1270 flow->encap_routes[out_index].r = r; 1271 if (new_encap_entry) 1272 list_add(&e->route_list, &r->encap_entries); 1273 flow->encap_routes[out_index].index = out_index; 1274 return 0; 1275 } 1276 1277 void mlx5e_detach_decap_route(struct mlx5e_priv *priv, 1278 struct mlx5e_tc_flow *flow) 1279 { 1280 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1281 struct mlx5e_route_entry *r = flow->decap_route; 1282 1283 if (!r) 1284 return; 1285 1286 mutex_lock(&esw->offloads.encap_tbl_lock); 1287 list_del(&flow->decap_routes); 1288 flow->decap_route = NULL; 1289 1290 if (!refcount_dec_and_test(&r->refcnt)) { 1291 mutex_unlock(&esw->offloads.encap_tbl_lock); 1292 return; 1293 } 1294 hash_del_rcu(&r->hlist); 1295 mutex_unlock(&esw->offloads.encap_tbl_lock); 1296 1297 mlx5e_route_dealloc(priv, r); 1298 } 1299 1300 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 1301 struct mlx5e_tc_flow *flow, 1302 int out_index) 1303 { 1304 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; 1305 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1306 struct mlx5e_encap_entry *e, *tmp; 1307 1308 if (!r) 1309 return; 1310 1311 mutex_lock(&esw->offloads.encap_tbl_lock); 1312 flow->encap_routes[out_index].r = NULL; 1313 1314 if (!refcount_dec_and_test(&r->refcnt)) { 1315 mutex_unlock(&esw->offloads.encap_tbl_lock); 1316 return; 1317 } 1318 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) 1319 list_del_init(&e->route_list); 1320 hash_del_rcu(&r->hlist); 1321 mutex_unlock(&esw->offloads.encap_tbl_lock); 1322 1323 mlx5e_route_dealloc(priv, r); 1324 } 1325 1326 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, 1327 struct mlx5e_encap_entry *e, 1328 struct list_head *encap_flows) 1329 { 1330 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1331 struct mlx5e_tc_flow *flow; 1332 1333 list_for_each_entry(flow, encap_flows, tmp_list) { 1334 struct mlx5_flow_attr *attr = flow->attr; 1335 struct mlx5_esw_flow_attr *esw_attr; 1336 1337 if (!mlx5e_is_offloaded_flow(flow)) 1338 continue; 1339 esw_attr = attr->esw_attr; 1340 1341 if (flow_flag_test(flow, SLOW)) 1342 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1343 else 1344 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1345 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); 1346 attr->modify_hdr = NULL; 1347 1348 esw_attr->dests[flow->tmp_entry_index].flags &= 1349 ~MLX5_ESW_DEST_ENCAP_VALID; 1350 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; 1351 } 1352 1353 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; 1354 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1355 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 1356 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 1357 e->pkt_reformat = NULL; 1358 } 1359 } 1360 1361 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, 1362 struct net_device *tunnel_dev, 1363 struct mlx5e_encap_entry *e, 1364 struct list_head *encap_flows) 1365 { 1366 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1367 struct mlx5e_tc_flow *flow; 1368 int err; 1369 1370 err = ip_tunnel_info_af(e->tun_info) == AF_INET ? 1371 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : 1372 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); 1373 if (err) 1374 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); 1375 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; 1376 1377 list_for_each_entry(flow, encap_flows, tmp_list) { 1378 struct mlx5e_tc_flow_parse_attr *parse_attr; 1379 struct mlx5_esw_flow_attr *esw_attr; 1380 struct mlx5_flow_handle *rule; 1381 struct mlx5_flow_attr *attr; 1382 struct mlx5_flow_spec *spec; 1383 1384 if (flow_flag_test(flow, FAILED)) 1385 continue; 1386 1387 spec = &flow->attr->parse_attr->spec; 1388 1389 attr = mlx5e_tc_get_encap_attr(flow); 1390 esw_attr = attr->esw_attr; 1391 parse_attr = attr->parse_attr; 1392 1393 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, 1394 e->out_dev, e->route_dev_ifindex, 1395 flow->tmp_entry_index); 1396 if (err) { 1397 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); 1398 continue; 1399 } 1400 1401 err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); 1402 if (err) { 1403 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", 1404 err); 1405 continue; 1406 } 1407 1408 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1409 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 1410 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 1411 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 1412 goto offload_to_slow_path; 1413 1414 err = mlx5e_tc_offload_flow_post_acts(flow); 1415 if (err) { 1416 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", 1417 err); 1418 goto offload_to_slow_path; 1419 } 1420 1421 /* update from slow path rule to encap rule */ 1422 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); 1423 if (IS_ERR(rule)) { 1424 mlx5e_tc_unoffload_flow_post_acts(flow); 1425 err = PTR_ERR(rule); 1426 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 1427 err); 1428 } else { 1429 flow->rule[0] = rule; 1430 } 1431 } else { 1432 offload_to_slow_path: 1433 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 1434 /* mark the flow's encap dest as non-valid */ 1435 esw_attr->dests[flow->tmp_entry_index].flags &= 1436 ~MLX5_ESW_DEST_ENCAP_VALID; 1437 1438 if (IS_ERR(rule)) { 1439 err = PTR_ERR(rule); 1440 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 1441 err); 1442 } else { 1443 flow->rule[0] = rule; 1444 } 1445 } 1446 flow_flag_set(flow, OFFLOADED); 1447 } 1448 } 1449 1450 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, 1451 struct mlx5e_route_entry *r, 1452 struct list_head *flow_list, 1453 bool replace) 1454 { 1455 struct net_device *tunnel_dev; 1456 struct mlx5e_encap_entry *e; 1457 1458 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1459 if (!tunnel_dev) 1460 return -ENODEV; 1461 1462 list_for_each_entry(e, &r->encap_entries, route_list) { 1463 LIST_HEAD(encap_flows); 1464 1465 mlx5e_take_all_encap_flows(e, &encap_flows); 1466 if (list_empty(&encap_flows)) 1467 continue; 1468 1469 if (mlx5e_route_entry_valid(r)) 1470 mlx5e_invalidate_encap(priv, e, &encap_flows); 1471 1472 if (!replace) { 1473 list_splice(&encap_flows, flow_list); 1474 continue; 1475 } 1476 1477 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); 1478 list_splice(&encap_flows, flow_list); 1479 } 1480 1481 return 0; 1482 } 1483 1484 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, 1485 struct list_head *flow_list) 1486 { 1487 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1488 struct mlx5e_tc_flow *flow; 1489 1490 list_for_each_entry(flow, flow_list, tmp_list) 1491 if (mlx5e_is_offloaded_flow(flow)) 1492 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1493 } 1494 1495 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, 1496 struct list_head *decap_flows) 1497 { 1498 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1499 struct mlx5e_tc_flow *flow; 1500 1501 list_for_each_entry(flow, decap_flows, tmp_list) { 1502 struct mlx5e_tc_flow_parse_attr *parse_attr; 1503 struct mlx5_flow_attr *attr = flow->attr; 1504 struct mlx5_flow_handle *rule; 1505 struct mlx5_flow_spec *spec; 1506 int err; 1507 1508 if (flow_flag_test(flow, FAILED)) 1509 continue; 1510 1511 parse_attr = attr->parse_attr; 1512 spec = &parse_attr->spec; 1513 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev); 1514 if (err) { 1515 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", 1516 err); 1517 continue; 1518 } 1519 1520 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1521 if (IS_ERR(rule)) { 1522 err = PTR_ERR(rule); 1523 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", 1524 err); 1525 } else { 1526 flow->rule[0] = rule; 1527 flow_flag_set(flow, OFFLOADED); 1528 } 1529 } 1530 } 1531 1532 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, 1533 struct mlx5e_route_entry *r, 1534 struct list_head *flow_list, 1535 bool replace) 1536 { 1537 struct net_device *tunnel_dev; 1538 LIST_HEAD(decap_flows); 1539 1540 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1541 if (!tunnel_dev) 1542 return -ENODEV; 1543 1544 mlx5e_take_all_route_decap_flows(r, &decap_flows); 1545 if (mlx5e_route_entry_valid(r)) 1546 mlx5e_unoffload_flow_list(priv, &decap_flows); 1547 if (replace) 1548 mlx5e_reoffload_decap(priv, &decap_flows); 1549 1550 list_splice(&decap_flows, flow_list); 1551 1552 return 0; 1553 } 1554 1555 static void mlx5e_tc_fib_event_work(struct work_struct *work) 1556 { 1557 struct mlx5e_tc_fib_event_data *event_data = 1558 container_of(work, struct mlx5e_tc_fib_event_data, work); 1559 struct net_device *ul_dev = event_data->ul_dev; 1560 struct mlx5e_priv *priv = netdev_priv(ul_dev); 1561 struct mlx5e_route_entry *r = event_data->r; 1562 struct mlx5_eswitch *esw; 1563 LIST_HEAD(flow_list); 1564 bool replace; 1565 int err; 1566 1567 /* sync with concurrent neigh updates */ 1568 rtnl_lock(); 1569 esw = priv->mdev->priv.eswitch; 1570 mutex_lock(&esw->offloads.encap_tbl_lock); 1571 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; 1572 1573 if (!mlx5e_route_entry_valid(r) && !replace) 1574 goto out; 1575 1576 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); 1577 if (err) 1578 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", 1579 err); 1580 1581 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); 1582 if (err) 1583 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", 1584 err); 1585 1586 if (replace) 1587 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1588 out: 1589 mutex_unlock(&esw->offloads.encap_tbl_lock); 1590 rtnl_unlock(); 1591 1592 mlx5e_put_flow_list(priv, &flow_list); 1593 mlx5e_route_put(priv, event_data->r); 1594 dev_put(event_data->ul_dev); 1595 kfree(event_data); 1596 } 1597 1598 static struct mlx5e_tc_fib_event_data * 1599 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, 1600 struct net_device *ul_dev, 1601 struct mlx5e_tc_tun_encap *encap, 1602 unsigned long event, 1603 struct fib_notifier_info *info) 1604 { 1605 struct fib_entry_notifier_info *fen_info; 1606 struct mlx5e_tc_fib_event_data *fib_work; 1607 struct mlx5e_route_entry *r; 1608 struct mlx5e_route_key key; 1609 struct net_device *fib_dev; 1610 1611 fen_info = container_of(info, struct fib_entry_notifier_info, info); 1612 if (fen_info->fi->nh) 1613 return NULL; 1614 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; 1615 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || 1616 fen_info->dst_len != 32) 1617 return NULL; 1618 1619 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1620 if (!fib_work) 1621 return ERR_PTR(-ENOMEM); 1622 1623 key.endpoint_ip.v4 = htonl(fen_info->dst); 1624 key.ip_version = 4; 1625 1626 /* Can't fail after this point because releasing reference to r 1627 * requires obtaining sleeping mutex which we can't do in atomic 1628 * context. 1629 */ 1630 r = mlx5e_route_lookup_for_update(encap, &key); 1631 if (!r) 1632 goto out; 1633 fib_work->r = r; 1634 dev_hold(ul_dev); 1635 1636 return fib_work; 1637 1638 out: 1639 kfree(fib_work); 1640 return NULL; 1641 } 1642 1643 static struct mlx5e_tc_fib_event_data * 1644 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, 1645 struct net_device *ul_dev, 1646 struct mlx5e_tc_tun_encap *encap, 1647 unsigned long event, 1648 struct fib_notifier_info *info) 1649 { 1650 struct fib6_entry_notifier_info *fen_info; 1651 struct mlx5e_tc_fib_event_data *fib_work; 1652 struct mlx5e_route_entry *r; 1653 struct mlx5e_route_key key; 1654 struct net_device *fib_dev; 1655 1656 fen_info = container_of(info, struct fib6_entry_notifier_info, info); 1657 fib_dev = fib6_info_nh_dev(fen_info->rt); 1658 if (fib_dev->netdev_ops != &mlx5e_netdev_ops || 1659 fen_info->rt->fib6_dst.plen != 128) 1660 return NULL; 1661 1662 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1663 if (!fib_work) 1664 return ERR_PTR(-ENOMEM); 1665 1666 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, 1667 sizeof(fen_info->rt->fib6_dst.addr)); 1668 key.ip_version = 6; 1669 1670 /* Can't fail after this point because releasing reference to r 1671 * requires obtaining sleeping mutex which we can't do in atomic 1672 * context. 1673 */ 1674 r = mlx5e_route_lookup_for_update(encap, &key); 1675 if (!r) 1676 goto out; 1677 fib_work->r = r; 1678 dev_hold(ul_dev); 1679 1680 return fib_work; 1681 1682 out: 1683 kfree(fib_work); 1684 return NULL; 1685 } 1686 1687 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) 1688 { 1689 struct mlx5e_tc_fib_event_data *fib_work; 1690 struct fib_notifier_info *info = ptr; 1691 struct mlx5e_tc_tun_encap *encap; 1692 struct net_device *ul_dev; 1693 struct mlx5e_priv *priv; 1694 1695 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); 1696 priv = encap->priv; 1697 ul_dev = priv->netdev; 1698 priv = netdev_priv(ul_dev); 1699 1700 switch (event) { 1701 case FIB_EVENT_ENTRY_REPLACE: 1702 case FIB_EVENT_ENTRY_DEL: 1703 if (info->family == AF_INET) 1704 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); 1705 else if (info->family == AF_INET6) 1706 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); 1707 else 1708 return NOTIFY_DONE; 1709 1710 if (!IS_ERR_OR_NULL(fib_work)) { 1711 queue_work(priv->wq, &fib_work->work); 1712 } else if (IS_ERR(fib_work)) { 1713 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); 1714 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", 1715 PTR_ERR(fib_work)); 1716 } 1717 1718 break; 1719 default: 1720 return NOTIFY_DONE; 1721 } 1722 1723 return NOTIFY_DONE; 1724 } 1725 1726 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) 1727 { 1728 struct mlx5e_tc_tun_encap *encap; 1729 int err; 1730 1731 encap = kvzalloc(sizeof(*encap), GFP_KERNEL); 1732 if (!encap) 1733 return ERR_PTR(-ENOMEM); 1734 1735 encap->priv = priv; 1736 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; 1737 spin_lock_init(&encap->route_lock); 1738 hash_init(encap->route_tbl); 1739 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, 1740 NULL, NULL); 1741 if (err) { 1742 kvfree(encap); 1743 return ERR_PTR(err); 1744 } 1745 1746 return encap; 1747 } 1748 1749 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) 1750 { 1751 if (!encap) 1752 return; 1753 1754 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); 1755 flush_workqueue(encap->priv->wq); /* flush fib event works */ 1756 kvfree(encap); 1757 } 1758