1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <net/fib_notifier.h> 5 #include <net/nexthop.h> 6 #include "tc_tun_encap.h" 7 #include "en_tc.h" 8 #include "tc_tun.h" 9 #include "rep/tc.h" 10 #include "diag/en_tc_tracepoint.h" 11 12 enum { 13 MLX5E_ROUTE_ENTRY_VALID = BIT(0), 14 }; 15 16 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, 17 struct mlx5_flow_attr *attr, 18 struct mlx5e_encap_entry *e, 19 int out_index) 20 { 21 struct net_device *route_dev; 22 int err = 0; 23 24 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); 25 26 if (!route_dev || !netif_is_ovs_master(route_dev)) 27 goto out; 28 29 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, 30 MLX5E_TC_INT_PORT_EGRESS, 31 &attr->action, out_index); 32 33 out: 34 if (route_dev) 35 dev_put(route_dev); 36 37 return err; 38 } 39 40 struct mlx5e_route_key { 41 int ip_version; 42 union { 43 __be32 v4; 44 struct in6_addr v6; 45 } endpoint_ip; 46 }; 47 48 struct mlx5e_route_entry { 49 struct mlx5e_route_key key; 50 struct list_head encap_entries; 51 struct list_head decap_flows; 52 u32 flags; 53 struct hlist_node hlist; 54 refcount_t refcnt; 55 int tunnel_dev_index; 56 struct rcu_head rcu; 57 }; 58 59 struct mlx5e_tc_tun_encap { 60 struct mlx5e_priv *priv; 61 struct notifier_block fib_nb; 62 spinlock_t route_lock; /* protects route_tbl */ 63 unsigned long route_tbl_last_update; 64 DECLARE_HASHTABLE(route_tbl, 8); 65 }; 66 67 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) 68 { 69 return r->flags & MLX5E_ROUTE_ENTRY_VALID; 70 } 71 72 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, 73 struct mlx5_flow_spec *spec) 74 { 75 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 76 struct mlx5_rx_tun_attr *tun_attr; 77 void *daddr, *saddr; 78 u8 ip_version; 79 80 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); 81 if (!tun_attr) 82 return -ENOMEM; 83 84 esw_attr->rx_tun_attr = tun_attr; 85 ip_version = mlx5e_tc_get_ip_version(spec, true); 86 87 if (ip_version == 4) { 88 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 89 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 90 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 91 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); 92 tun_attr->dst_ip.v4 = *(__be32 *)daddr; 93 tun_attr->src_ip.v4 = *(__be32 *)saddr; 94 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) 95 return 0; 96 } 97 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) 98 else if (ip_version == 6) { 99 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); 100 struct in6_addr zerov6 = {}; 101 102 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 103 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); 104 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 105 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); 106 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); 107 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); 108 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || 109 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) 110 return 0; 111 } 112 #endif 113 /* Only set the flag if both src and dst ip addresses exist. They are 114 * required to establish routing. 115 */ 116 flow_flag_set(flow, TUN_RX); 117 flow->attr->tun_ip_version = ip_version; 118 return 0; 119 } 120 121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) 122 { 123 bool all_flow_encaps_valid = true; 124 int i; 125 126 /* Flow can be associated with multiple encap entries. 127 * Before offloading the flow verify that all of them have 128 * a valid neighbour. 129 */ 130 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 131 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) 132 continue; 133 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { 134 all_flow_encaps_valid = false; 135 break; 136 } 137 } 138 139 return all_flow_encaps_valid; 140 } 141 142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, 143 struct mlx5e_encap_entry *e, 144 struct list_head *flow_list) 145 { 146 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 147 struct mlx5_pkt_reformat_params reformat_params; 148 struct mlx5_esw_flow_attr *esw_attr; 149 struct mlx5_flow_handle *rule; 150 struct mlx5_flow_attr *attr; 151 struct mlx5_flow_spec *spec; 152 struct mlx5e_tc_flow *flow; 153 int err; 154 155 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) 156 return; 157 158 memset(&reformat_params, 0, sizeof(reformat_params)); 159 reformat_params.type = e->reformat_type; 160 reformat_params.size = e->encap_size; 161 reformat_params.data = e->encap_header; 162 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 163 &reformat_params, 164 MLX5_FLOW_NAMESPACE_FDB); 165 if (IS_ERR(e->pkt_reformat)) { 166 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", 167 PTR_ERR(e->pkt_reformat)); 168 return; 169 } 170 e->flags |= MLX5_ENCAP_ENTRY_VALID; 171 mlx5e_rep_queue_neigh_stats_work(priv); 172 173 list_for_each_entry(flow, flow_list, tmp_list) { 174 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) 175 continue; 176 attr = flow->attr; 177 esw_attr = attr->esw_attr; 178 spec = &attr->parse_attr->spec; 179 180 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 181 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 182 183 /* Do not offload flows with unresolved neighbors */ 184 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 185 continue; 186 /* update from slow path rule to encap rule */ 187 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 188 if (IS_ERR(rule)) { 189 err = PTR_ERR(rule); 190 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 191 err); 192 continue; 193 } 194 195 mlx5e_tc_unoffload_from_slow_path(esw, flow); 196 flow->rule[0] = rule; 197 /* was unset when slow path rule removed */ 198 flow_flag_set(flow, OFFLOADED); 199 } 200 } 201 202 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, 203 struct mlx5e_encap_entry *e, 204 struct list_head *flow_list) 205 { 206 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 207 struct mlx5_esw_flow_attr *esw_attr; 208 struct mlx5_flow_handle *rule; 209 struct mlx5_flow_attr *attr; 210 struct mlx5_flow_spec *spec; 211 struct mlx5e_tc_flow *flow; 212 int err; 213 214 list_for_each_entry(flow, flow_list, tmp_list) { 215 if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) 216 continue; 217 attr = flow->attr; 218 esw_attr = attr->esw_attr; 219 spec = &attr->parse_attr->spec; 220 221 /* update from encap rule to slow path rule */ 222 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 223 /* mark the flow's encap dest as non-valid */ 224 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; 225 226 if (IS_ERR(rule)) { 227 err = PTR_ERR(rule); 228 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 229 err); 230 continue; 231 } 232 233 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 234 flow->rule[0] = rule; 235 /* was unset when fast path rule removed */ 236 flow_flag_set(flow, OFFLOADED); 237 } 238 239 /* we know that the encap is valid */ 240 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 241 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 242 } 243 244 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, 245 struct list_head *flow_list, 246 int index) 247 { 248 if (IS_ERR(mlx5e_flow_get(flow))) { 249 /* Flow is being deleted concurrently. Wait for it to be 250 * unoffloaded from hardware, otherwise deleting encap will 251 * fail. 252 */ 253 wait_for_completion(&flow->del_hw_done); 254 return; 255 } 256 wait_for_completion(&flow->init_done); 257 258 flow->tmp_entry_index = index; 259 list_add(&flow->tmp_list, flow_list); 260 } 261 262 /* Takes reference to all flows attached to encap and adds the flows to 263 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 264 */ 265 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) 266 { 267 struct encap_flow_item *efi; 268 struct mlx5e_tc_flow *flow; 269 270 list_for_each_entry(efi, &e->flows, list) { 271 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); 272 mlx5e_take_tmp_flow(flow, flow_list, efi->index); 273 } 274 } 275 276 /* Takes reference to all flows attached to route and adds the flows to 277 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 278 */ 279 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, 280 struct list_head *flow_list) 281 { 282 struct mlx5e_tc_flow *flow; 283 284 list_for_each_entry(flow, &r->decap_flows, decap_routes) 285 mlx5e_take_tmp_flow(flow, flow_list, 0); 286 } 287 288 typedef bool (match_cb)(struct mlx5e_encap_entry *); 289 290 static struct mlx5e_encap_entry * 291 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, 292 struct mlx5e_encap_entry *e, 293 match_cb match) 294 { 295 struct mlx5e_encap_entry *next = NULL; 296 297 retry: 298 rcu_read_lock(); 299 300 /* find encap with non-zero reference counter value */ 301 for (next = e ? 302 list_next_or_null_rcu(&nhe->encap_list, 303 &e->encap_list, 304 struct mlx5e_encap_entry, 305 encap_list) : 306 list_first_or_null_rcu(&nhe->encap_list, 307 struct mlx5e_encap_entry, 308 encap_list); 309 next; 310 next = list_next_or_null_rcu(&nhe->encap_list, 311 &next->encap_list, 312 struct mlx5e_encap_entry, 313 encap_list)) 314 if (mlx5e_encap_take(next)) 315 break; 316 317 rcu_read_unlock(); 318 319 /* release starting encap */ 320 if (e) 321 mlx5e_encap_put(netdev_priv(e->out_dev), e); 322 if (!next) 323 return next; 324 325 /* wait for encap to be fully initialized */ 326 wait_for_completion(&next->res_ready); 327 /* continue searching if encap entry is not in valid state after completion */ 328 if (!match(next)) { 329 e = next; 330 goto retry; 331 } 332 333 return next; 334 } 335 336 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) 337 { 338 return e->flags & MLX5_ENCAP_ENTRY_VALID; 339 } 340 341 static struct mlx5e_encap_entry * 342 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 343 struct mlx5e_encap_entry *e) 344 { 345 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); 346 } 347 348 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) 349 { 350 return e->compl_result >= 0; 351 } 352 353 struct mlx5e_encap_entry * 354 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, 355 struct mlx5e_encap_entry *e) 356 { 357 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); 358 } 359 360 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) 361 { 362 struct mlx5e_neigh *m_neigh = &nhe->m_neigh; 363 struct mlx5e_encap_entry *e = NULL; 364 struct mlx5e_tc_flow *flow; 365 struct mlx5_fc *counter; 366 struct neigh_table *tbl; 367 bool neigh_used = false; 368 struct neighbour *n; 369 u64 lastuse; 370 371 if (m_neigh->family == AF_INET) 372 tbl = &arp_tbl; 373 #if IS_ENABLED(CONFIG_IPV6) 374 else if (m_neigh->family == AF_INET6) 375 tbl = ipv6_stub->nd_tbl; 376 #endif 377 else 378 return; 379 380 /* mlx5e_get_next_valid_encap() releases previous encap before returning 381 * next one. 382 */ 383 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { 384 struct mlx5e_priv *priv = netdev_priv(e->out_dev); 385 struct encap_flow_item *efi, *tmp; 386 struct mlx5_eswitch *esw; 387 LIST_HEAD(flow_list); 388 389 esw = priv->mdev->priv.eswitch; 390 mutex_lock(&esw->offloads.encap_tbl_lock); 391 list_for_each_entry_safe(efi, tmp, &e->flows, list) { 392 flow = container_of(efi, struct mlx5e_tc_flow, 393 encaps[efi->index]); 394 if (IS_ERR(mlx5e_flow_get(flow))) 395 continue; 396 list_add(&flow->tmp_list, &flow_list); 397 398 if (mlx5e_is_offloaded_flow(flow)) { 399 counter = mlx5e_tc_get_counter(flow); 400 lastuse = mlx5_fc_query_lastuse(counter); 401 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { 402 neigh_used = true; 403 break; 404 } 405 } 406 } 407 mutex_unlock(&esw->offloads.encap_tbl_lock); 408 409 mlx5e_put_flow_list(priv, &flow_list); 410 if (neigh_used) { 411 /* release current encap before breaking the loop */ 412 mlx5e_encap_put(priv, e); 413 break; 414 } 415 } 416 417 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); 418 419 if (neigh_used) { 420 nhe->reported_lastuse = jiffies; 421 422 /* find the relevant neigh according to the cached device and 423 * dst ip pair 424 */ 425 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); 426 if (!n) 427 return; 428 429 neigh_event_send(n, NULL); 430 neigh_release(n); 431 } 432 } 433 434 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 435 { 436 WARN_ON(!list_empty(&e->flows)); 437 438 if (e->compl_result > 0) { 439 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); 440 441 if (e->flags & MLX5_ENCAP_ENTRY_VALID) 442 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 443 } 444 445 kfree(e->tun_info); 446 kfree(e->encap_header); 447 kfree_rcu(e, rcu); 448 } 449 450 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, 451 struct mlx5e_decap_entry *d) 452 { 453 WARN_ON(!list_empty(&d->flows)); 454 455 if (!d->compl_result) 456 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); 457 458 kfree_rcu(d, rcu); 459 } 460 461 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 462 { 463 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 464 465 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) 466 return; 467 list_del(&e->route_list); 468 hash_del_rcu(&e->encap_hlist); 469 mutex_unlock(&esw->offloads.encap_tbl_lock); 470 471 mlx5e_encap_dealloc(priv, e); 472 } 473 474 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) 475 { 476 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 477 478 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) 479 return; 480 hash_del_rcu(&d->hlist); 481 mutex_unlock(&esw->offloads.decap_tbl_lock); 482 483 mlx5e_decap_dealloc(priv, d); 484 } 485 486 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 487 struct mlx5e_tc_flow *flow, 488 int out_index); 489 490 void mlx5e_detach_encap(struct mlx5e_priv *priv, 491 struct mlx5e_tc_flow *flow, int out_index) 492 { 493 struct mlx5e_encap_entry *e = flow->encaps[out_index].e; 494 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 495 496 if (flow->attr->esw_attr->dests[out_index].flags & 497 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) 498 mlx5e_detach_encap_route(priv, flow, out_index); 499 500 /* flow wasn't fully initialized */ 501 if (!e) 502 return; 503 504 mutex_lock(&esw->offloads.encap_tbl_lock); 505 list_del(&flow->encaps[out_index].list); 506 flow->encaps[out_index].e = NULL; 507 if (!refcount_dec_and_test(&e->refcnt)) { 508 mutex_unlock(&esw->offloads.encap_tbl_lock); 509 return; 510 } 511 list_del(&e->route_list); 512 hash_del_rcu(&e->encap_hlist); 513 mutex_unlock(&esw->offloads.encap_tbl_lock); 514 515 mlx5e_encap_dealloc(priv, e); 516 } 517 518 void mlx5e_detach_decap(struct mlx5e_priv *priv, 519 struct mlx5e_tc_flow *flow) 520 { 521 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 522 struct mlx5e_decap_entry *d = flow->decap_reformat; 523 524 if (!d) 525 return; 526 527 mutex_lock(&esw->offloads.decap_tbl_lock); 528 list_del(&flow->l3_to_l2_reformat); 529 flow->decap_reformat = NULL; 530 531 if (!refcount_dec_and_test(&d->refcnt)) { 532 mutex_unlock(&esw->offloads.decap_tbl_lock); 533 return; 534 } 535 hash_del_rcu(&d->hlist); 536 mutex_unlock(&esw->offloads.decap_tbl_lock); 537 538 mlx5e_decap_dealloc(priv, d); 539 } 540 541 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, 542 struct mlx5e_encap_key *b) 543 { 544 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && 545 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; 546 } 547 548 static int cmp_decap_info(struct mlx5e_decap_key *a, 549 struct mlx5e_decap_key *b) 550 { 551 return memcmp(&a->key, &b->key, sizeof(b->key)); 552 } 553 554 static int hash_encap_info(struct mlx5e_encap_key *key) 555 { 556 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), 557 key->tc_tunnel->tunnel_type); 558 } 559 560 static int hash_decap_info(struct mlx5e_decap_key *key) 561 { 562 return jhash(&key->key, sizeof(key->key), 0); 563 } 564 565 bool mlx5e_encap_take(struct mlx5e_encap_entry *e) 566 { 567 return refcount_inc_not_zero(&e->refcnt); 568 } 569 570 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) 571 { 572 return refcount_inc_not_zero(&e->refcnt); 573 } 574 575 static struct mlx5e_encap_entry * 576 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, 577 uintptr_t hash_key) 578 { 579 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 580 struct mlx5e_encap_key e_key; 581 struct mlx5e_encap_entry *e; 582 583 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, 584 encap_hlist, hash_key) { 585 e_key.ip_tun_key = &e->tun_info->key; 586 e_key.tc_tunnel = e->tunnel; 587 if (e->tunnel->encap_info_equal(&e_key, key) && 588 mlx5e_encap_take(e)) 589 return e; 590 } 591 592 return NULL; 593 } 594 595 static struct mlx5e_decap_entry * 596 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, 597 uintptr_t hash_key) 598 { 599 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 600 struct mlx5e_decap_key r_key; 601 struct mlx5e_decap_entry *e; 602 603 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, 604 hlist, hash_key) { 605 r_key = e->key; 606 if (!cmp_decap_info(&r_key, key) && 607 mlx5e_decap_take(e)) 608 return e; 609 } 610 return NULL; 611 } 612 613 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) 614 { 615 size_t tun_size = sizeof(*tun_info) + tun_info->options_len; 616 617 return kmemdup(tun_info, tun_size, GFP_KERNEL); 618 } 619 620 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, 621 struct mlx5e_tc_flow *flow, 622 int out_index, 623 struct mlx5e_encap_entry *e, 624 struct netlink_ext_ack *extack) 625 { 626 int i; 627 628 for (i = 0; i < out_index; i++) { 629 if (flow->encaps[i].e != e) 630 continue; 631 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); 632 netdev_err(priv->netdev, "can't duplicate encap action\n"); 633 return true; 634 } 635 636 return false; 637 } 638 639 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, 640 struct mlx5_flow_attr *attr, 641 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 642 struct net_device *out_dev, 643 int route_dev_ifindex, 644 int out_index) 645 { 646 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 647 struct net_device *route_dev; 648 u16 vport_num; 649 int err = 0; 650 u32 data; 651 652 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 653 654 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 655 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) 656 goto out; 657 658 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 659 if (err) 660 goto out; 661 662 attr->dest_chain = 0; 663 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 664 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 665 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, 666 vport_num); 667 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, 668 MLX5_FLOW_NAMESPACE_FDB, 669 VPORT_TO_REG, data); 670 if (err >= 0) { 671 esw_attr->dests[out_index].src_port_rewrite_act_id = err; 672 err = 0; 673 } 674 675 out: 676 if (route_dev) 677 dev_put(route_dev); 678 return err; 679 } 680 681 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, 682 struct mlx5_esw_flow_attr *attr, 683 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 684 struct net_device *out_dev, 685 int route_dev_ifindex, 686 int out_index) 687 { 688 int act_id = attr->dests[out_index].src_port_rewrite_act_id; 689 struct net_device *route_dev; 690 u16 vport_num; 691 int err = 0; 692 u32 data; 693 694 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 695 696 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 697 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { 698 err = -ENODEV; 699 goto out; 700 } 701 702 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 703 if (err) 704 goto out; 705 706 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, 707 vport_num); 708 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); 709 710 out: 711 if (route_dev) 712 dev_put(route_dev); 713 return err; 714 } 715 716 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) 717 { 718 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 719 struct mlx5_rep_uplink_priv *uplink_priv; 720 struct mlx5e_rep_priv *uplink_rpriv; 721 struct mlx5e_tc_tun_encap *encap; 722 unsigned int ret; 723 724 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 725 uplink_priv = &uplink_rpriv->uplink_priv; 726 encap = uplink_priv->encap; 727 728 spin_lock_bh(&encap->route_lock); 729 ret = encap->route_tbl_last_update; 730 spin_unlock_bh(&encap->route_lock); 731 return ret; 732 } 733 734 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 735 struct mlx5e_tc_flow *flow, 736 struct mlx5e_encap_entry *e, 737 bool new_encap_entry, 738 unsigned long tbl_time_before, 739 int out_index); 740 741 int mlx5e_attach_encap(struct mlx5e_priv *priv, 742 struct mlx5e_tc_flow *flow, 743 struct net_device *mirred_dev, 744 int out_index, 745 struct netlink_ext_ack *extack, 746 struct net_device **encap_dev, 747 bool *encap_valid) 748 { 749 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 750 struct mlx5e_tc_flow_parse_attr *parse_attr; 751 struct mlx5_flow_attr *attr = flow->attr; 752 const struct ip_tunnel_info *tun_info; 753 unsigned long tbl_time_before = 0; 754 struct mlx5e_encap_entry *e; 755 struct mlx5e_encap_key key; 756 bool entry_created = false; 757 unsigned short family; 758 uintptr_t hash_key; 759 int err = 0; 760 761 parse_attr = attr->parse_attr; 762 tun_info = parse_attr->tun_info[out_index]; 763 family = ip_tunnel_info_af(tun_info); 764 key.ip_tun_key = &tun_info->key; 765 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); 766 if (!key.tc_tunnel) { 767 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); 768 return -EOPNOTSUPP; 769 } 770 771 hash_key = hash_encap_info(&key); 772 773 mutex_lock(&esw->offloads.encap_tbl_lock); 774 e = mlx5e_encap_get(priv, &key, hash_key); 775 776 /* must verify if encap is valid or not */ 777 if (e) { 778 /* Check that entry was not already attached to this flow */ 779 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { 780 err = -EOPNOTSUPP; 781 goto out_err; 782 } 783 784 mutex_unlock(&esw->offloads.encap_tbl_lock); 785 wait_for_completion(&e->res_ready); 786 787 /* Protect against concurrent neigh update. */ 788 mutex_lock(&esw->offloads.encap_tbl_lock); 789 if (e->compl_result < 0) { 790 err = -EREMOTEIO; 791 goto out_err; 792 } 793 goto attach_flow; 794 } 795 796 e = kzalloc(sizeof(*e), GFP_KERNEL); 797 if (!e) { 798 err = -ENOMEM; 799 goto out_err; 800 } 801 802 refcount_set(&e->refcnt, 1); 803 init_completion(&e->res_ready); 804 entry_created = true; 805 INIT_LIST_HEAD(&e->route_list); 806 807 tun_info = mlx5e_dup_tun_info(tun_info); 808 if (!tun_info) { 809 err = -ENOMEM; 810 goto out_err_init; 811 } 812 e->tun_info = tun_info; 813 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); 814 if (err) 815 goto out_err_init; 816 817 INIT_LIST_HEAD(&e->flows); 818 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); 819 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 820 mutex_unlock(&esw->offloads.encap_tbl_lock); 821 822 if (family == AF_INET) 823 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); 824 else if (family == AF_INET6) 825 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); 826 827 /* Protect against concurrent neigh update. */ 828 mutex_lock(&esw->offloads.encap_tbl_lock); 829 complete_all(&e->res_ready); 830 if (err) { 831 e->compl_result = err; 832 goto out_err; 833 } 834 e->compl_result = 1; 835 836 attach_flow: 837 err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, 838 out_index); 839 if (err) 840 goto out_err; 841 842 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index); 843 if (err == -EOPNOTSUPP) { 844 /* If device doesn't support int port offload, 845 * redirect to uplink vport. 846 */ 847 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n"); 848 err = 0; 849 } else if (err) { 850 goto out_err; 851 } 852 853 flow->encaps[out_index].e = e; 854 list_add(&flow->encaps[out_index].list, &e->flows); 855 flow->encaps[out_index].index = out_index; 856 *encap_dev = e->out_dev; 857 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 858 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; 859 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 860 *encap_valid = true; 861 } else { 862 *encap_valid = false; 863 } 864 mutex_unlock(&esw->offloads.encap_tbl_lock); 865 866 return err; 867 868 out_err: 869 mutex_unlock(&esw->offloads.encap_tbl_lock); 870 if (e) 871 mlx5e_encap_put(priv, e); 872 return err; 873 874 out_err_init: 875 mutex_unlock(&esw->offloads.encap_tbl_lock); 876 kfree(tun_info); 877 kfree(e); 878 return err; 879 } 880 881 int mlx5e_attach_decap(struct mlx5e_priv *priv, 882 struct mlx5e_tc_flow *flow, 883 struct netlink_ext_ack *extack) 884 { 885 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 886 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 887 struct mlx5_pkt_reformat_params reformat_params; 888 struct mlx5e_tc_flow_parse_attr *parse_attr; 889 struct mlx5e_decap_entry *d; 890 struct mlx5e_decap_key key; 891 uintptr_t hash_key; 892 int err = 0; 893 894 parse_attr = flow->attr->parse_attr; 895 if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { 896 NL_SET_ERR_MSG_MOD(extack, 897 "encap header larger than max supported"); 898 return -EOPNOTSUPP; 899 } 900 901 key.key = parse_attr->eth; 902 hash_key = hash_decap_info(&key); 903 mutex_lock(&esw->offloads.decap_tbl_lock); 904 d = mlx5e_decap_get(priv, &key, hash_key); 905 if (d) { 906 mutex_unlock(&esw->offloads.decap_tbl_lock); 907 wait_for_completion(&d->res_ready); 908 mutex_lock(&esw->offloads.decap_tbl_lock); 909 if (d->compl_result) { 910 err = -EREMOTEIO; 911 goto out_free; 912 } 913 goto found; 914 } 915 916 d = kzalloc(sizeof(*d), GFP_KERNEL); 917 if (!d) { 918 err = -ENOMEM; 919 goto out_err; 920 } 921 922 d->key = key; 923 refcount_set(&d->refcnt, 1); 924 init_completion(&d->res_ready); 925 INIT_LIST_HEAD(&d->flows); 926 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); 927 mutex_unlock(&esw->offloads.decap_tbl_lock); 928 929 memset(&reformat_params, 0, sizeof(reformat_params)); 930 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; 931 reformat_params.size = sizeof(parse_attr->eth); 932 reformat_params.data = &parse_attr->eth; 933 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 934 &reformat_params, 935 MLX5_FLOW_NAMESPACE_FDB); 936 if (IS_ERR(d->pkt_reformat)) { 937 err = PTR_ERR(d->pkt_reformat); 938 d->compl_result = err; 939 } 940 mutex_lock(&esw->offloads.decap_tbl_lock); 941 complete_all(&d->res_ready); 942 if (err) 943 goto out_free; 944 945 found: 946 flow->decap_reformat = d; 947 attr->decap_pkt_reformat = d->pkt_reformat; 948 list_add(&flow->l3_to_l2_reformat, &d->flows); 949 mutex_unlock(&esw->offloads.decap_tbl_lock); 950 return 0; 951 952 out_free: 953 mutex_unlock(&esw->offloads.decap_tbl_lock); 954 mlx5e_decap_put(priv, d); 955 return err; 956 957 out_err: 958 mutex_unlock(&esw->offloads.decap_tbl_lock); 959 return err; 960 } 961 962 static int cmp_route_info(struct mlx5e_route_key *a, 963 struct mlx5e_route_key *b) 964 { 965 if (a->ip_version == 4 && b->ip_version == 4) 966 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, 967 sizeof(a->endpoint_ip.v4)); 968 else if (a->ip_version == 6 && b->ip_version == 6) 969 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, 970 sizeof(a->endpoint_ip.v6)); 971 return 1; 972 } 973 974 static u32 hash_route_info(struct mlx5e_route_key *key) 975 { 976 if (key->ip_version == 4) 977 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); 978 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); 979 } 980 981 static void mlx5e_route_dealloc(struct mlx5e_priv *priv, 982 struct mlx5e_route_entry *r) 983 { 984 WARN_ON(!list_empty(&r->decap_flows)); 985 WARN_ON(!list_empty(&r->encap_entries)); 986 987 kfree_rcu(r, rcu); 988 } 989 990 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 991 { 992 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 993 994 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) 995 return; 996 997 hash_del_rcu(&r->hlist); 998 mutex_unlock(&esw->offloads.encap_tbl_lock); 999 1000 mlx5e_route_dealloc(priv, r); 1001 } 1002 1003 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 1004 { 1005 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1006 1007 lockdep_assert_held(&esw->offloads.encap_tbl_lock); 1008 1009 if (!refcount_dec_and_test(&r->refcnt)) 1010 return; 1011 hash_del_rcu(&r->hlist); 1012 mlx5e_route_dealloc(priv, r); 1013 } 1014 1015 static struct mlx5e_route_entry * 1016 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, 1017 u32 hash_key) 1018 { 1019 struct mlx5e_route_key r_key; 1020 struct mlx5e_route_entry *r; 1021 1022 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { 1023 r_key = r->key; 1024 if (!cmp_route_info(&r_key, key) && 1025 refcount_inc_not_zero(&r->refcnt)) 1026 return r; 1027 } 1028 return NULL; 1029 } 1030 1031 static struct mlx5e_route_entry * 1032 mlx5e_route_get_create(struct mlx5e_priv *priv, 1033 struct mlx5e_route_key *key, 1034 int tunnel_dev_index, 1035 unsigned long *route_tbl_change_time) 1036 { 1037 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1038 struct mlx5_rep_uplink_priv *uplink_priv; 1039 struct mlx5e_rep_priv *uplink_rpriv; 1040 struct mlx5e_tc_tun_encap *encap; 1041 struct mlx5e_route_entry *r; 1042 u32 hash_key; 1043 1044 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1045 uplink_priv = &uplink_rpriv->uplink_priv; 1046 encap = uplink_priv->encap; 1047 1048 hash_key = hash_route_info(key); 1049 spin_lock_bh(&encap->route_lock); 1050 r = mlx5e_route_get(encap, key, hash_key); 1051 spin_unlock_bh(&encap->route_lock); 1052 if (r) { 1053 if (!mlx5e_route_entry_valid(r)) { 1054 mlx5e_route_put_locked(priv, r); 1055 return ERR_PTR(-EINVAL); 1056 } 1057 return r; 1058 } 1059 1060 r = kzalloc(sizeof(*r), GFP_KERNEL); 1061 if (!r) 1062 return ERR_PTR(-ENOMEM); 1063 1064 r->key = *key; 1065 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1066 r->tunnel_dev_index = tunnel_dev_index; 1067 refcount_set(&r->refcnt, 1); 1068 INIT_LIST_HEAD(&r->decap_flows); 1069 INIT_LIST_HEAD(&r->encap_entries); 1070 1071 spin_lock_bh(&encap->route_lock); 1072 *route_tbl_change_time = encap->route_tbl_last_update; 1073 hash_add(encap->route_tbl, &r->hlist, hash_key); 1074 spin_unlock_bh(&encap->route_lock); 1075 1076 return r; 1077 } 1078 1079 static struct mlx5e_route_entry * 1080 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) 1081 { 1082 u32 hash_key = hash_route_info(key); 1083 struct mlx5e_route_entry *r; 1084 1085 spin_lock_bh(&encap->route_lock); 1086 encap->route_tbl_last_update = jiffies; 1087 r = mlx5e_route_get(encap, key, hash_key); 1088 spin_unlock_bh(&encap->route_lock); 1089 1090 return r; 1091 } 1092 1093 struct mlx5e_tc_fib_event_data { 1094 struct work_struct work; 1095 unsigned long event; 1096 struct mlx5e_route_entry *r; 1097 struct net_device *ul_dev; 1098 }; 1099 1100 static void mlx5e_tc_fib_event_work(struct work_struct *work); 1101 static struct mlx5e_tc_fib_event_data * 1102 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) 1103 { 1104 struct mlx5e_tc_fib_event_data *fib_work; 1105 1106 fib_work = kzalloc(sizeof(*fib_work), flags); 1107 if (WARN_ON(!fib_work)) 1108 return NULL; 1109 1110 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); 1111 fib_work->event = event; 1112 fib_work->ul_dev = ul_dev; 1113 1114 return fib_work; 1115 } 1116 1117 static int 1118 mlx5e_route_enqueue_update(struct mlx5e_priv *priv, 1119 struct mlx5e_route_entry *r, 1120 unsigned long event) 1121 { 1122 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1123 struct mlx5e_tc_fib_event_data *fib_work; 1124 struct mlx5e_rep_priv *uplink_rpriv; 1125 struct net_device *ul_dev; 1126 1127 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1128 ul_dev = uplink_rpriv->netdev; 1129 1130 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); 1131 if (!fib_work) 1132 return -ENOMEM; 1133 1134 dev_hold(ul_dev); 1135 refcount_inc(&r->refcnt); 1136 fib_work->r = r; 1137 queue_work(priv->wq, &fib_work->work); 1138 1139 return 0; 1140 } 1141 1142 int mlx5e_attach_decap_route(struct mlx5e_priv *priv, 1143 struct mlx5e_tc_flow *flow) 1144 { 1145 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1146 unsigned long tbl_time_before, tbl_time_after; 1147 struct mlx5e_tc_flow_parse_attr *parse_attr; 1148 struct mlx5_flow_attr *attr = flow->attr; 1149 struct mlx5_esw_flow_attr *esw_attr; 1150 struct mlx5e_route_entry *r; 1151 struct mlx5e_route_key key; 1152 int err = 0; 1153 1154 esw_attr = attr->esw_attr; 1155 parse_attr = attr->parse_attr; 1156 mutex_lock(&esw->offloads.encap_tbl_lock); 1157 if (!esw_attr->rx_tun_attr) 1158 goto out; 1159 1160 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 1161 tbl_time_after = tbl_time_before; 1162 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev); 1163 if (err || !esw_attr->rx_tun_attr->decap_vport) 1164 goto out; 1165 1166 key.ip_version = attr->tun_ip_version; 1167 if (key.ip_version == 4) 1168 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; 1169 else 1170 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; 1171 1172 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, 1173 &tbl_time_after); 1174 if (IS_ERR(r)) { 1175 err = PTR_ERR(r); 1176 goto out; 1177 } 1178 /* Routing changed concurrently. FIB event handler might have missed new 1179 * entry, schedule update. 1180 */ 1181 if (tbl_time_before != tbl_time_after) { 1182 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1183 if (err) { 1184 mlx5e_route_put_locked(priv, r); 1185 goto out; 1186 } 1187 } 1188 1189 flow->decap_route = r; 1190 list_add(&flow->decap_routes, &r->decap_flows); 1191 mutex_unlock(&esw->offloads.encap_tbl_lock); 1192 return 0; 1193 1194 out: 1195 mutex_unlock(&esw->offloads.encap_tbl_lock); 1196 return err; 1197 } 1198 1199 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 1200 struct mlx5e_tc_flow *flow, 1201 struct mlx5e_encap_entry *e, 1202 bool new_encap_entry, 1203 unsigned long tbl_time_before, 1204 int out_index) 1205 { 1206 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1207 unsigned long tbl_time_after = tbl_time_before; 1208 struct mlx5e_tc_flow_parse_attr *parse_attr; 1209 struct mlx5_flow_attr *attr = flow->attr; 1210 const struct ip_tunnel_info *tun_info; 1211 struct mlx5_esw_flow_attr *esw_attr; 1212 struct mlx5e_route_entry *r; 1213 struct mlx5e_route_key key; 1214 unsigned short family; 1215 int err = 0; 1216 1217 esw_attr = attr->esw_attr; 1218 parse_attr = attr->parse_attr; 1219 tun_info = parse_attr->tun_info[out_index]; 1220 family = ip_tunnel_info_af(tun_info); 1221 1222 if (family == AF_INET) { 1223 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; 1224 key.ip_version = 4; 1225 } else if (family == AF_INET6) { 1226 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; 1227 key.ip_version = 6; 1228 } 1229 1230 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, 1231 e->route_dev_ifindex, out_index); 1232 if (err || !(esw_attr->dests[out_index].flags & 1233 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) 1234 return err; 1235 1236 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], 1237 &tbl_time_after); 1238 if (IS_ERR(r)) 1239 return PTR_ERR(r); 1240 /* Routing changed concurrently. FIB event handler might have missed new 1241 * entry, schedule update. 1242 */ 1243 if (tbl_time_before != tbl_time_after) { 1244 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1245 if (err) { 1246 mlx5e_route_put_locked(priv, r); 1247 return err; 1248 } 1249 } 1250 1251 flow->encap_routes[out_index].r = r; 1252 if (new_encap_entry) 1253 list_add(&e->route_list, &r->encap_entries); 1254 flow->encap_routes[out_index].index = out_index; 1255 return 0; 1256 } 1257 1258 void mlx5e_detach_decap_route(struct mlx5e_priv *priv, 1259 struct mlx5e_tc_flow *flow) 1260 { 1261 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1262 struct mlx5e_route_entry *r = flow->decap_route; 1263 1264 if (!r) 1265 return; 1266 1267 mutex_lock(&esw->offloads.encap_tbl_lock); 1268 list_del(&flow->decap_routes); 1269 flow->decap_route = NULL; 1270 1271 if (!refcount_dec_and_test(&r->refcnt)) { 1272 mutex_unlock(&esw->offloads.encap_tbl_lock); 1273 return; 1274 } 1275 hash_del_rcu(&r->hlist); 1276 mutex_unlock(&esw->offloads.encap_tbl_lock); 1277 1278 mlx5e_route_dealloc(priv, r); 1279 } 1280 1281 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 1282 struct mlx5e_tc_flow *flow, 1283 int out_index) 1284 { 1285 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; 1286 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1287 struct mlx5e_encap_entry *e, *tmp; 1288 1289 if (!r) 1290 return; 1291 1292 mutex_lock(&esw->offloads.encap_tbl_lock); 1293 flow->encap_routes[out_index].r = NULL; 1294 1295 if (!refcount_dec_and_test(&r->refcnt)) { 1296 mutex_unlock(&esw->offloads.encap_tbl_lock); 1297 return; 1298 } 1299 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) 1300 list_del_init(&e->route_list); 1301 hash_del_rcu(&r->hlist); 1302 mutex_unlock(&esw->offloads.encap_tbl_lock); 1303 1304 mlx5e_route_dealloc(priv, r); 1305 } 1306 1307 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, 1308 struct mlx5e_encap_entry *e, 1309 struct list_head *encap_flows) 1310 { 1311 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1312 struct mlx5e_tc_flow *flow; 1313 1314 list_for_each_entry(flow, encap_flows, tmp_list) { 1315 struct mlx5_flow_attr *attr = flow->attr; 1316 struct mlx5_esw_flow_attr *esw_attr; 1317 1318 if (!mlx5e_is_offloaded_flow(flow)) 1319 continue; 1320 esw_attr = attr->esw_attr; 1321 1322 if (flow_flag_test(flow, SLOW)) 1323 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1324 else 1325 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1326 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); 1327 attr->modify_hdr = NULL; 1328 1329 esw_attr->dests[flow->tmp_entry_index].flags &= 1330 ~MLX5_ESW_DEST_ENCAP_VALID; 1331 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; 1332 } 1333 1334 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; 1335 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1336 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 1337 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 1338 e->pkt_reformat = NULL; 1339 } 1340 } 1341 1342 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, 1343 struct net_device *tunnel_dev, 1344 struct mlx5e_encap_entry *e, 1345 struct list_head *encap_flows) 1346 { 1347 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1348 struct mlx5e_tc_flow *flow; 1349 int err; 1350 1351 err = ip_tunnel_info_af(e->tun_info) == AF_INET ? 1352 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : 1353 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); 1354 if (err) 1355 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); 1356 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; 1357 1358 list_for_each_entry(flow, encap_flows, tmp_list) { 1359 struct mlx5e_tc_flow_parse_attr *parse_attr; 1360 struct mlx5_flow_attr *attr = flow->attr; 1361 struct mlx5_esw_flow_attr *esw_attr; 1362 struct mlx5_flow_handle *rule; 1363 struct mlx5_flow_spec *spec; 1364 1365 if (flow_flag_test(flow, FAILED)) 1366 continue; 1367 1368 esw_attr = attr->esw_attr; 1369 parse_attr = attr->parse_attr; 1370 spec = &parse_attr->spec; 1371 1372 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, 1373 e->out_dev, e->route_dev_ifindex, 1374 flow->tmp_entry_index); 1375 if (err) { 1376 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); 1377 continue; 1378 } 1379 1380 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); 1381 if (err) { 1382 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", 1383 err); 1384 continue; 1385 } 1386 1387 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1388 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 1389 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 1390 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 1391 goto offload_to_slow_path; 1392 /* update from slow path rule to encap rule */ 1393 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1394 if (IS_ERR(rule)) { 1395 err = PTR_ERR(rule); 1396 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 1397 err); 1398 } else { 1399 flow->rule[0] = rule; 1400 } 1401 } else { 1402 offload_to_slow_path: 1403 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 1404 /* mark the flow's encap dest as non-valid */ 1405 esw_attr->dests[flow->tmp_entry_index].flags &= 1406 ~MLX5_ESW_DEST_ENCAP_VALID; 1407 1408 if (IS_ERR(rule)) { 1409 err = PTR_ERR(rule); 1410 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 1411 err); 1412 } else { 1413 flow->rule[0] = rule; 1414 } 1415 } 1416 flow_flag_set(flow, OFFLOADED); 1417 } 1418 } 1419 1420 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, 1421 struct mlx5e_route_entry *r, 1422 struct list_head *flow_list, 1423 bool replace) 1424 { 1425 struct net_device *tunnel_dev; 1426 struct mlx5e_encap_entry *e; 1427 1428 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1429 if (!tunnel_dev) 1430 return -ENODEV; 1431 1432 list_for_each_entry(e, &r->encap_entries, route_list) { 1433 LIST_HEAD(encap_flows); 1434 1435 mlx5e_take_all_encap_flows(e, &encap_flows); 1436 if (list_empty(&encap_flows)) 1437 continue; 1438 1439 if (mlx5e_route_entry_valid(r)) 1440 mlx5e_invalidate_encap(priv, e, &encap_flows); 1441 1442 if (!replace) { 1443 list_splice(&encap_flows, flow_list); 1444 continue; 1445 } 1446 1447 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); 1448 list_splice(&encap_flows, flow_list); 1449 } 1450 1451 return 0; 1452 } 1453 1454 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, 1455 struct list_head *flow_list) 1456 { 1457 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1458 struct mlx5e_tc_flow *flow; 1459 1460 list_for_each_entry(flow, flow_list, tmp_list) 1461 if (mlx5e_is_offloaded_flow(flow)) 1462 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1463 } 1464 1465 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, 1466 struct list_head *decap_flows) 1467 { 1468 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1469 struct mlx5e_tc_flow *flow; 1470 1471 list_for_each_entry(flow, decap_flows, tmp_list) { 1472 struct mlx5e_tc_flow_parse_attr *parse_attr; 1473 struct mlx5_flow_attr *attr = flow->attr; 1474 struct mlx5_flow_handle *rule; 1475 struct mlx5_flow_spec *spec; 1476 int err; 1477 1478 if (flow_flag_test(flow, FAILED)) 1479 continue; 1480 1481 parse_attr = attr->parse_attr; 1482 spec = &parse_attr->spec; 1483 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev); 1484 if (err) { 1485 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", 1486 err); 1487 continue; 1488 } 1489 1490 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1491 if (IS_ERR(rule)) { 1492 err = PTR_ERR(rule); 1493 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", 1494 err); 1495 } else { 1496 flow->rule[0] = rule; 1497 flow_flag_set(flow, OFFLOADED); 1498 } 1499 } 1500 } 1501 1502 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, 1503 struct mlx5e_route_entry *r, 1504 struct list_head *flow_list, 1505 bool replace) 1506 { 1507 struct net_device *tunnel_dev; 1508 LIST_HEAD(decap_flows); 1509 1510 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1511 if (!tunnel_dev) 1512 return -ENODEV; 1513 1514 mlx5e_take_all_route_decap_flows(r, &decap_flows); 1515 if (mlx5e_route_entry_valid(r)) 1516 mlx5e_unoffload_flow_list(priv, &decap_flows); 1517 if (replace) 1518 mlx5e_reoffload_decap(priv, &decap_flows); 1519 1520 list_splice(&decap_flows, flow_list); 1521 1522 return 0; 1523 } 1524 1525 static void mlx5e_tc_fib_event_work(struct work_struct *work) 1526 { 1527 struct mlx5e_tc_fib_event_data *event_data = 1528 container_of(work, struct mlx5e_tc_fib_event_data, work); 1529 struct net_device *ul_dev = event_data->ul_dev; 1530 struct mlx5e_priv *priv = netdev_priv(ul_dev); 1531 struct mlx5e_route_entry *r = event_data->r; 1532 struct mlx5_eswitch *esw; 1533 LIST_HEAD(flow_list); 1534 bool replace; 1535 int err; 1536 1537 /* sync with concurrent neigh updates */ 1538 rtnl_lock(); 1539 esw = priv->mdev->priv.eswitch; 1540 mutex_lock(&esw->offloads.encap_tbl_lock); 1541 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; 1542 1543 if (!mlx5e_route_entry_valid(r) && !replace) 1544 goto out; 1545 1546 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); 1547 if (err) 1548 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", 1549 err); 1550 1551 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); 1552 if (err) 1553 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", 1554 err); 1555 1556 if (replace) 1557 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1558 out: 1559 mutex_unlock(&esw->offloads.encap_tbl_lock); 1560 rtnl_unlock(); 1561 1562 mlx5e_put_flow_list(priv, &flow_list); 1563 mlx5e_route_put(priv, event_data->r); 1564 dev_put(event_data->ul_dev); 1565 kfree(event_data); 1566 } 1567 1568 static struct mlx5e_tc_fib_event_data * 1569 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, 1570 struct net_device *ul_dev, 1571 struct mlx5e_tc_tun_encap *encap, 1572 unsigned long event, 1573 struct fib_notifier_info *info) 1574 { 1575 struct fib_entry_notifier_info *fen_info; 1576 struct mlx5e_tc_fib_event_data *fib_work; 1577 struct mlx5e_route_entry *r; 1578 struct mlx5e_route_key key; 1579 struct net_device *fib_dev; 1580 1581 fen_info = container_of(info, struct fib_entry_notifier_info, info); 1582 if (fen_info->fi->nh) 1583 return NULL; 1584 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; 1585 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || 1586 fen_info->dst_len != 32) 1587 return NULL; 1588 1589 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1590 if (!fib_work) 1591 return ERR_PTR(-ENOMEM); 1592 1593 key.endpoint_ip.v4 = htonl(fen_info->dst); 1594 key.ip_version = 4; 1595 1596 /* Can't fail after this point because releasing reference to r 1597 * requires obtaining sleeping mutex which we can't do in atomic 1598 * context. 1599 */ 1600 r = mlx5e_route_lookup_for_update(encap, &key); 1601 if (!r) 1602 goto out; 1603 fib_work->r = r; 1604 dev_hold(ul_dev); 1605 1606 return fib_work; 1607 1608 out: 1609 kfree(fib_work); 1610 return NULL; 1611 } 1612 1613 static struct mlx5e_tc_fib_event_data * 1614 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, 1615 struct net_device *ul_dev, 1616 struct mlx5e_tc_tun_encap *encap, 1617 unsigned long event, 1618 struct fib_notifier_info *info) 1619 { 1620 struct fib6_entry_notifier_info *fen_info; 1621 struct mlx5e_tc_fib_event_data *fib_work; 1622 struct mlx5e_route_entry *r; 1623 struct mlx5e_route_key key; 1624 struct net_device *fib_dev; 1625 1626 fen_info = container_of(info, struct fib6_entry_notifier_info, info); 1627 fib_dev = fib6_info_nh_dev(fen_info->rt); 1628 if (fib_dev->netdev_ops != &mlx5e_netdev_ops || 1629 fen_info->rt->fib6_dst.plen != 128) 1630 return NULL; 1631 1632 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1633 if (!fib_work) 1634 return ERR_PTR(-ENOMEM); 1635 1636 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, 1637 sizeof(fen_info->rt->fib6_dst.addr)); 1638 key.ip_version = 6; 1639 1640 /* Can't fail after this point because releasing reference to r 1641 * requires obtaining sleeping mutex which we can't do in atomic 1642 * context. 1643 */ 1644 r = mlx5e_route_lookup_for_update(encap, &key); 1645 if (!r) 1646 goto out; 1647 fib_work->r = r; 1648 dev_hold(ul_dev); 1649 1650 return fib_work; 1651 1652 out: 1653 kfree(fib_work); 1654 return NULL; 1655 } 1656 1657 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) 1658 { 1659 struct mlx5e_tc_fib_event_data *fib_work; 1660 struct fib_notifier_info *info = ptr; 1661 struct mlx5e_tc_tun_encap *encap; 1662 struct net_device *ul_dev; 1663 struct mlx5e_priv *priv; 1664 1665 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); 1666 priv = encap->priv; 1667 ul_dev = priv->netdev; 1668 priv = netdev_priv(ul_dev); 1669 1670 switch (event) { 1671 case FIB_EVENT_ENTRY_REPLACE: 1672 case FIB_EVENT_ENTRY_DEL: 1673 if (info->family == AF_INET) 1674 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); 1675 else if (info->family == AF_INET6) 1676 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); 1677 else 1678 return NOTIFY_DONE; 1679 1680 if (!IS_ERR_OR_NULL(fib_work)) { 1681 queue_work(priv->wq, &fib_work->work); 1682 } else if (IS_ERR(fib_work)) { 1683 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); 1684 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", 1685 PTR_ERR(fib_work)); 1686 } 1687 1688 break; 1689 default: 1690 return NOTIFY_DONE; 1691 } 1692 1693 return NOTIFY_DONE; 1694 } 1695 1696 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) 1697 { 1698 struct mlx5e_tc_tun_encap *encap; 1699 int err; 1700 1701 encap = kvzalloc(sizeof(*encap), GFP_KERNEL); 1702 if (!encap) 1703 return ERR_PTR(-ENOMEM); 1704 1705 encap->priv = priv; 1706 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; 1707 spin_lock_init(&encap->route_lock); 1708 hash_init(encap->route_tbl); 1709 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, 1710 NULL, NULL); 1711 if (err) { 1712 kvfree(encap); 1713 return ERR_PTR(err); 1714 } 1715 1716 return encap; 1717 } 1718 1719 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) 1720 { 1721 if (!encap) 1722 return; 1723 1724 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); 1725 flush_workqueue(encap->priv->wq); /* flush fib event works */ 1726 kvfree(encap); 1727 } 1728