1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <net/fib_notifier.h> 5 #include "tc_tun_encap.h" 6 #include "en_tc.h" 7 #include "tc_tun.h" 8 #include "rep/tc.h" 9 #include "diag/en_tc_tracepoint.h" 10 11 enum { 12 MLX5E_ROUTE_ENTRY_VALID = BIT(0), 13 }; 14 15 struct mlx5e_route_key { 16 int ip_version; 17 union { 18 __be32 v4; 19 struct in6_addr v6; 20 } endpoint_ip; 21 }; 22 23 struct mlx5e_route_entry { 24 struct mlx5e_route_key key; 25 struct list_head encap_entries; 26 struct list_head decap_flows; 27 u32 flags; 28 struct hlist_node hlist; 29 refcount_t refcnt; 30 int tunnel_dev_index; 31 struct rcu_head rcu; 32 }; 33 34 struct mlx5e_tc_tun_encap { 35 struct mlx5e_priv *priv; 36 struct notifier_block fib_nb; 37 spinlock_t route_lock; /* protects route_tbl */ 38 unsigned long route_tbl_last_update; 39 DECLARE_HASHTABLE(route_tbl, 8); 40 }; 41 42 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) 43 { 44 return r->flags & MLX5E_ROUTE_ENTRY_VALID; 45 } 46 47 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, 48 struct mlx5_flow_spec *spec) 49 { 50 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 51 struct mlx5_rx_tun_attr *tun_attr; 52 void *daddr, *saddr; 53 u8 ip_version; 54 55 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); 56 if (!tun_attr) 57 return -ENOMEM; 58 59 esw_attr->rx_tun_attr = tun_attr; 60 ip_version = mlx5e_tc_get_ip_version(spec, true); 61 62 if (ip_version == 4) { 63 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 64 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 65 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 66 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); 67 tun_attr->dst_ip.v4 = *(__be32 *)daddr; 68 tun_attr->src_ip.v4 = *(__be32 *)saddr; 69 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) 70 return 0; 71 } 72 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) 73 else if (ip_version == 6) { 74 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); 75 struct in6_addr zerov6 = {}; 76 77 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 78 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); 79 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 80 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); 81 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); 82 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); 83 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || 84 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) 85 return 0; 86 } 87 #endif 88 /* Only set the flag if both src and dst ip addresses exist. They are 89 * required to establish routing. 90 */ 91 flow_flag_set(flow, TUN_RX); 92 return 0; 93 } 94 95 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) 96 { 97 bool all_flow_encaps_valid = true; 98 int i; 99 100 /* Flow can be associated with multiple encap entries. 101 * Before offloading the flow verify that all of them have 102 * a valid neighbour. 103 */ 104 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 105 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) 106 continue; 107 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { 108 all_flow_encaps_valid = false; 109 break; 110 } 111 } 112 113 return all_flow_encaps_valid; 114 } 115 116 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, 117 struct mlx5e_encap_entry *e, 118 struct list_head *flow_list) 119 { 120 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 121 struct mlx5_esw_flow_attr *esw_attr; 122 struct mlx5_flow_handle *rule; 123 struct mlx5_flow_attr *attr; 124 struct mlx5_flow_spec *spec; 125 struct mlx5e_tc_flow *flow; 126 int err; 127 128 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) 129 return; 130 131 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 132 e->reformat_type, 133 e->encap_size, e->encap_header, 134 MLX5_FLOW_NAMESPACE_FDB); 135 if (IS_ERR(e->pkt_reformat)) { 136 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", 137 PTR_ERR(e->pkt_reformat)); 138 return; 139 } 140 e->flags |= MLX5_ENCAP_ENTRY_VALID; 141 mlx5e_rep_queue_neigh_stats_work(priv); 142 143 list_for_each_entry(flow, flow_list, tmp_list) { 144 if (!mlx5e_is_offloaded_flow(flow)) 145 continue; 146 attr = flow->attr; 147 esw_attr = attr->esw_attr; 148 spec = &attr->parse_attr->spec; 149 150 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 151 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 152 153 /* Do not offload flows with unresolved neighbors */ 154 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 155 continue; 156 /* update from slow path rule to encap rule */ 157 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 158 if (IS_ERR(rule)) { 159 err = PTR_ERR(rule); 160 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 161 err); 162 continue; 163 } 164 165 mlx5e_tc_unoffload_from_slow_path(esw, flow); 166 flow->rule[0] = rule; 167 /* was unset when slow path rule removed */ 168 flow_flag_set(flow, OFFLOADED); 169 } 170 } 171 172 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, 173 struct mlx5e_encap_entry *e, 174 struct list_head *flow_list) 175 { 176 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 177 struct mlx5_esw_flow_attr *esw_attr; 178 struct mlx5_flow_handle *rule; 179 struct mlx5_flow_attr *attr; 180 struct mlx5_flow_spec *spec; 181 struct mlx5e_tc_flow *flow; 182 int err; 183 184 list_for_each_entry(flow, flow_list, tmp_list) { 185 if (!mlx5e_is_offloaded_flow(flow)) 186 continue; 187 attr = flow->attr; 188 esw_attr = attr->esw_attr; 189 spec = &attr->parse_attr->spec; 190 191 /* update from encap rule to slow path rule */ 192 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 193 /* mark the flow's encap dest as non-valid */ 194 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; 195 196 if (IS_ERR(rule)) { 197 err = PTR_ERR(rule); 198 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 199 err); 200 continue; 201 } 202 203 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 204 flow->rule[0] = rule; 205 /* was unset when fast path rule removed */ 206 flow_flag_set(flow, OFFLOADED); 207 } 208 209 /* we know that the encap is valid */ 210 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 211 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 212 } 213 214 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, 215 struct list_head *flow_list, 216 int index) 217 { 218 if (IS_ERR(mlx5e_flow_get(flow))) 219 return; 220 wait_for_completion(&flow->init_done); 221 222 flow->tmp_entry_index = index; 223 list_add(&flow->tmp_list, flow_list); 224 } 225 226 /* Takes reference to all flows attached to encap and adds the flows to 227 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 228 */ 229 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) 230 { 231 struct encap_flow_item *efi; 232 struct mlx5e_tc_flow *flow; 233 234 list_for_each_entry(efi, &e->flows, list) { 235 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); 236 mlx5e_take_tmp_flow(flow, flow_list, efi->index); 237 } 238 } 239 240 /* Takes reference to all flows attached to route and adds the flows to 241 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 242 */ 243 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, 244 struct list_head *flow_list) 245 { 246 struct mlx5e_tc_flow *flow; 247 248 list_for_each_entry(flow, &r->decap_flows, decap_routes) 249 mlx5e_take_tmp_flow(flow, flow_list, 0); 250 } 251 252 static struct mlx5e_encap_entry * 253 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 254 struct mlx5e_encap_entry *e) 255 { 256 struct mlx5e_encap_entry *next = NULL; 257 258 retry: 259 rcu_read_lock(); 260 261 /* find encap with non-zero reference counter value */ 262 for (next = e ? 263 list_next_or_null_rcu(&nhe->encap_list, 264 &e->encap_list, 265 struct mlx5e_encap_entry, 266 encap_list) : 267 list_first_or_null_rcu(&nhe->encap_list, 268 struct mlx5e_encap_entry, 269 encap_list); 270 next; 271 next = list_next_or_null_rcu(&nhe->encap_list, 272 &next->encap_list, 273 struct mlx5e_encap_entry, 274 encap_list)) 275 if (mlx5e_encap_take(next)) 276 break; 277 278 rcu_read_unlock(); 279 280 /* release starting encap */ 281 if (e) 282 mlx5e_encap_put(netdev_priv(e->out_dev), e); 283 if (!next) 284 return next; 285 286 /* wait for encap to be fully initialized */ 287 wait_for_completion(&next->res_ready); 288 /* continue searching if encap entry is not in valid state after completion */ 289 if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { 290 e = next; 291 goto retry; 292 } 293 294 return next; 295 } 296 297 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) 298 { 299 struct mlx5e_neigh *m_neigh = &nhe->m_neigh; 300 struct mlx5e_encap_entry *e = NULL; 301 struct mlx5e_tc_flow *flow; 302 struct mlx5_fc *counter; 303 struct neigh_table *tbl; 304 bool neigh_used = false; 305 struct neighbour *n; 306 u64 lastuse; 307 308 if (m_neigh->family == AF_INET) 309 tbl = &arp_tbl; 310 #if IS_ENABLED(CONFIG_IPV6) 311 else if (m_neigh->family == AF_INET6) 312 tbl = ipv6_stub->nd_tbl; 313 #endif 314 else 315 return; 316 317 /* mlx5e_get_next_valid_encap() releases previous encap before returning 318 * next one. 319 */ 320 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { 321 struct mlx5e_priv *priv = netdev_priv(e->out_dev); 322 struct encap_flow_item *efi, *tmp; 323 struct mlx5_eswitch *esw; 324 LIST_HEAD(flow_list); 325 326 esw = priv->mdev->priv.eswitch; 327 mutex_lock(&esw->offloads.encap_tbl_lock); 328 list_for_each_entry_safe(efi, tmp, &e->flows, list) { 329 flow = container_of(efi, struct mlx5e_tc_flow, 330 encaps[efi->index]); 331 if (IS_ERR(mlx5e_flow_get(flow))) 332 continue; 333 list_add(&flow->tmp_list, &flow_list); 334 335 if (mlx5e_is_offloaded_flow(flow)) { 336 counter = mlx5e_tc_get_counter(flow); 337 lastuse = mlx5_fc_query_lastuse(counter); 338 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { 339 neigh_used = true; 340 break; 341 } 342 } 343 } 344 mutex_unlock(&esw->offloads.encap_tbl_lock); 345 346 mlx5e_put_flow_list(priv, &flow_list); 347 if (neigh_used) { 348 /* release current encap before breaking the loop */ 349 mlx5e_encap_put(priv, e); 350 break; 351 } 352 } 353 354 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); 355 356 if (neigh_used) { 357 nhe->reported_lastuse = jiffies; 358 359 /* find the relevant neigh according to the cached device and 360 * dst ip pair 361 */ 362 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); 363 if (!n) 364 return; 365 366 neigh_event_send(n, NULL); 367 neigh_release(n); 368 } 369 } 370 371 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 372 { 373 WARN_ON(!list_empty(&e->flows)); 374 375 if (e->compl_result > 0) { 376 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); 377 378 if (e->flags & MLX5_ENCAP_ENTRY_VALID) 379 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 380 } 381 382 kfree(e->tun_info); 383 kfree(e->encap_header); 384 kfree_rcu(e, rcu); 385 } 386 387 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, 388 struct mlx5e_decap_entry *d) 389 { 390 WARN_ON(!list_empty(&d->flows)); 391 392 if (!d->compl_result) 393 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); 394 395 kfree_rcu(d, rcu); 396 } 397 398 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 399 { 400 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 401 402 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) 403 return; 404 list_del(&e->route_list); 405 hash_del_rcu(&e->encap_hlist); 406 mutex_unlock(&esw->offloads.encap_tbl_lock); 407 408 mlx5e_encap_dealloc(priv, e); 409 } 410 411 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) 412 { 413 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 414 415 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) 416 return; 417 hash_del_rcu(&d->hlist); 418 mutex_unlock(&esw->offloads.decap_tbl_lock); 419 420 mlx5e_decap_dealloc(priv, d); 421 } 422 423 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 424 struct mlx5e_tc_flow *flow, 425 int out_index); 426 427 void mlx5e_detach_encap(struct mlx5e_priv *priv, 428 struct mlx5e_tc_flow *flow, int out_index) 429 { 430 struct mlx5e_encap_entry *e = flow->encaps[out_index].e; 431 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 432 433 if (flow->attr->esw_attr->dests[out_index].flags & 434 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) 435 mlx5e_detach_encap_route(priv, flow, out_index); 436 437 /* flow wasn't fully initialized */ 438 if (!e) 439 return; 440 441 mutex_lock(&esw->offloads.encap_tbl_lock); 442 list_del(&flow->encaps[out_index].list); 443 flow->encaps[out_index].e = NULL; 444 if (!refcount_dec_and_test(&e->refcnt)) { 445 mutex_unlock(&esw->offloads.encap_tbl_lock); 446 return; 447 } 448 list_del(&e->route_list); 449 hash_del_rcu(&e->encap_hlist); 450 mutex_unlock(&esw->offloads.encap_tbl_lock); 451 452 mlx5e_encap_dealloc(priv, e); 453 } 454 455 void mlx5e_detach_decap(struct mlx5e_priv *priv, 456 struct mlx5e_tc_flow *flow) 457 { 458 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 459 struct mlx5e_decap_entry *d = flow->decap_reformat; 460 461 if (!d) 462 return; 463 464 mutex_lock(&esw->offloads.decap_tbl_lock); 465 list_del(&flow->l3_to_l2_reformat); 466 flow->decap_reformat = NULL; 467 468 if (!refcount_dec_and_test(&d->refcnt)) { 469 mutex_unlock(&esw->offloads.decap_tbl_lock); 470 return; 471 } 472 hash_del_rcu(&d->hlist); 473 mutex_unlock(&esw->offloads.decap_tbl_lock); 474 475 mlx5e_decap_dealloc(priv, d); 476 } 477 478 struct encap_key { 479 const struct ip_tunnel_key *ip_tun_key; 480 struct mlx5e_tc_tunnel *tc_tunnel; 481 }; 482 483 static int cmp_encap_info(struct encap_key *a, 484 struct encap_key *b) 485 { 486 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || 487 a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; 488 } 489 490 static int cmp_decap_info(struct mlx5e_decap_key *a, 491 struct mlx5e_decap_key *b) 492 { 493 return memcmp(&a->key, &b->key, sizeof(b->key)); 494 } 495 496 static int hash_encap_info(struct encap_key *key) 497 { 498 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), 499 key->tc_tunnel->tunnel_type); 500 } 501 502 static int hash_decap_info(struct mlx5e_decap_key *key) 503 { 504 return jhash(&key->key, sizeof(key->key), 0); 505 } 506 507 bool mlx5e_encap_take(struct mlx5e_encap_entry *e) 508 { 509 return refcount_inc_not_zero(&e->refcnt); 510 } 511 512 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) 513 { 514 return refcount_inc_not_zero(&e->refcnt); 515 } 516 517 static struct mlx5e_encap_entry * 518 mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, 519 uintptr_t hash_key) 520 { 521 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 522 struct mlx5e_encap_entry *e; 523 struct encap_key e_key; 524 525 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, 526 encap_hlist, hash_key) { 527 e_key.ip_tun_key = &e->tun_info->key; 528 e_key.tc_tunnel = e->tunnel; 529 if (!cmp_encap_info(&e_key, key) && 530 mlx5e_encap_take(e)) 531 return e; 532 } 533 534 return NULL; 535 } 536 537 static struct mlx5e_decap_entry * 538 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, 539 uintptr_t hash_key) 540 { 541 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 542 struct mlx5e_decap_key r_key; 543 struct mlx5e_decap_entry *e; 544 545 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, 546 hlist, hash_key) { 547 r_key = e->key; 548 if (!cmp_decap_info(&r_key, key) && 549 mlx5e_decap_take(e)) 550 return e; 551 } 552 return NULL; 553 } 554 555 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) 556 { 557 size_t tun_size = sizeof(*tun_info) + tun_info->options_len; 558 559 return kmemdup(tun_info, tun_size, GFP_KERNEL); 560 } 561 562 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, 563 struct mlx5e_tc_flow *flow, 564 int out_index, 565 struct mlx5e_encap_entry *e, 566 struct netlink_ext_ack *extack) 567 { 568 int i; 569 570 for (i = 0; i < out_index; i++) { 571 if (flow->encaps[i].e != e) 572 continue; 573 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); 574 netdev_err(priv->netdev, "can't duplicate encap action\n"); 575 return true; 576 } 577 578 return false; 579 } 580 581 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, 582 struct mlx5_flow_attr *attr, 583 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 584 struct net_device *out_dev, 585 int route_dev_ifindex, 586 int out_index) 587 { 588 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 589 struct net_device *route_dev; 590 u16 vport_num; 591 int err = 0; 592 u32 data; 593 594 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 595 596 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 597 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) 598 goto out; 599 600 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 601 if (err) 602 goto out; 603 604 attr->dest_chain = 0; 605 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 606 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 607 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, 608 vport_num); 609 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, 610 MLX5_FLOW_NAMESPACE_FDB, 611 VPORT_TO_REG, data); 612 if (err >= 0) { 613 esw_attr->dests[out_index].src_port_rewrite_act_id = err; 614 err = 0; 615 } 616 617 out: 618 if (route_dev) 619 dev_put(route_dev); 620 return err; 621 } 622 623 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, 624 struct mlx5_esw_flow_attr *attr, 625 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 626 struct net_device *out_dev, 627 int route_dev_ifindex, 628 int out_index) 629 { 630 int act_id = attr->dests[out_index].src_port_rewrite_act_id; 631 struct net_device *route_dev; 632 u16 vport_num; 633 int err = 0; 634 u32 data; 635 636 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 637 638 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 639 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { 640 err = -ENODEV; 641 goto out; 642 } 643 644 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 645 if (err) 646 goto out; 647 648 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, 649 vport_num); 650 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); 651 652 out: 653 if (route_dev) 654 dev_put(route_dev); 655 return err; 656 } 657 658 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) 659 { 660 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 661 struct mlx5_rep_uplink_priv *uplink_priv; 662 struct mlx5e_rep_priv *uplink_rpriv; 663 struct mlx5e_tc_tun_encap *encap; 664 unsigned int ret; 665 666 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 667 uplink_priv = &uplink_rpriv->uplink_priv; 668 encap = uplink_priv->encap; 669 670 spin_lock_bh(&encap->route_lock); 671 ret = encap->route_tbl_last_update; 672 spin_unlock_bh(&encap->route_lock); 673 return ret; 674 } 675 676 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 677 struct mlx5e_tc_flow *flow, 678 struct mlx5e_encap_entry *e, 679 bool new_encap_entry, 680 unsigned long tbl_time_before, 681 int out_index); 682 683 int mlx5e_attach_encap(struct mlx5e_priv *priv, 684 struct mlx5e_tc_flow *flow, 685 struct net_device *mirred_dev, 686 int out_index, 687 struct netlink_ext_ack *extack, 688 struct net_device **encap_dev, 689 bool *encap_valid) 690 { 691 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 692 struct mlx5e_tc_flow_parse_attr *parse_attr; 693 struct mlx5_flow_attr *attr = flow->attr; 694 const struct ip_tunnel_info *tun_info; 695 unsigned long tbl_time_before = 0; 696 struct encap_key key; 697 struct mlx5e_encap_entry *e; 698 bool entry_created = false; 699 unsigned short family; 700 uintptr_t hash_key; 701 int err = 0; 702 703 parse_attr = attr->parse_attr; 704 tun_info = parse_attr->tun_info[out_index]; 705 family = ip_tunnel_info_af(tun_info); 706 key.ip_tun_key = &tun_info->key; 707 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); 708 if (!key.tc_tunnel) { 709 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); 710 return -EOPNOTSUPP; 711 } 712 713 hash_key = hash_encap_info(&key); 714 715 mutex_lock(&esw->offloads.encap_tbl_lock); 716 e = mlx5e_encap_get(priv, &key, hash_key); 717 718 /* must verify if encap is valid or not */ 719 if (e) { 720 /* Check that entry was not already attached to this flow */ 721 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { 722 err = -EOPNOTSUPP; 723 goto out_err; 724 } 725 726 mutex_unlock(&esw->offloads.encap_tbl_lock); 727 wait_for_completion(&e->res_ready); 728 729 /* Protect against concurrent neigh update. */ 730 mutex_lock(&esw->offloads.encap_tbl_lock); 731 if (e->compl_result < 0) { 732 err = -EREMOTEIO; 733 goto out_err; 734 } 735 goto attach_flow; 736 } 737 738 e = kzalloc(sizeof(*e), GFP_KERNEL); 739 if (!e) { 740 err = -ENOMEM; 741 goto out_err; 742 } 743 744 refcount_set(&e->refcnt, 1); 745 init_completion(&e->res_ready); 746 entry_created = true; 747 INIT_LIST_HEAD(&e->route_list); 748 749 tun_info = mlx5e_dup_tun_info(tun_info); 750 if (!tun_info) { 751 err = -ENOMEM; 752 goto out_err_init; 753 } 754 e->tun_info = tun_info; 755 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); 756 if (err) 757 goto out_err_init; 758 759 INIT_LIST_HEAD(&e->flows); 760 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); 761 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 762 mutex_unlock(&esw->offloads.encap_tbl_lock); 763 764 if (family == AF_INET) 765 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); 766 else if (family == AF_INET6) 767 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); 768 769 /* Protect against concurrent neigh update. */ 770 mutex_lock(&esw->offloads.encap_tbl_lock); 771 complete_all(&e->res_ready); 772 if (err) { 773 e->compl_result = err; 774 goto out_err; 775 } 776 e->compl_result = 1; 777 778 attach_flow: 779 err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, 780 out_index); 781 if (err) 782 goto out_err; 783 784 flow->encaps[out_index].e = e; 785 list_add(&flow->encaps[out_index].list, &e->flows); 786 flow->encaps[out_index].index = out_index; 787 *encap_dev = e->out_dev; 788 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 789 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; 790 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 791 *encap_valid = true; 792 } else { 793 *encap_valid = false; 794 } 795 mutex_unlock(&esw->offloads.encap_tbl_lock); 796 797 return err; 798 799 out_err: 800 mutex_unlock(&esw->offloads.encap_tbl_lock); 801 if (e) 802 mlx5e_encap_put(priv, e); 803 return err; 804 805 out_err_init: 806 mutex_unlock(&esw->offloads.encap_tbl_lock); 807 kfree(tun_info); 808 kfree(e); 809 return err; 810 } 811 812 int mlx5e_attach_decap(struct mlx5e_priv *priv, 813 struct mlx5e_tc_flow *flow, 814 struct netlink_ext_ack *extack) 815 { 816 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 817 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 818 struct mlx5e_tc_flow_parse_attr *parse_attr; 819 struct mlx5e_decap_entry *d; 820 struct mlx5e_decap_key key; 821 uintptr_t hash_key; 822 int err = 0; 823 824 parse_attr = flow->attr->parse_attr; 825 if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { 826 NL_SET_ERR_MSG_MOD(extack, 827 "encap header larger than max supported"); 828 return -EOPNOTSUPP; 829 } 830 831 key.key = parse_attr->eth; 832 hash_key = hash_decap_info(&key); 833 mutex_lock(&esw->offloads.decap_tbl_lock); 834 d = mlx5e_decap_get(priv, &key, hash_key); 835 if (d) { 836 mutex_unlock(&esw->offloads.decap_tbl_lock); 837 wait_for_completion(&d->res_ready); 838 mutex_lock(&esw->offloads.decap_tbl_lock); 839 if (d->compl_result) { 840 err = -EREMOTEIO; 841 goto out_free; 842 } 843 goto found; 844 } 845 846 d = kzalloc(sizeof(*d), GFP_KERNEL); 847 if (!d) { 848 err = -ENOMEM; 849 goto out_err; 850 } 851 852 d->key = key; 853 refcount_set(&d->refcnt, 1); 854 init_completion(&d->res_ready); 855 INIT_LIST_HEAD(&d->flows); 856 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); 857 mutex_unlock(&esw->offloads.decap_tbl_lock); 858 859 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 860 MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, 861 sizeof(parse_attr->eth), 862 &parse_attr->eth, 863 MLX5_FLOW_NAMESPACE_FDB); 864 if (IS_ERR(d->pkt_reformat)) { 865 err = PTR_ERR(d->pkt_reformat); 866 d->compl_result = err; 867 } 868 mutex_lock(&esw->offloads.decap_tbl_lock); 869 complete_all(&d->res_ready); 870 if (err) 871 goto out_free; 872 873 found: 874 flow->decap_reformat = d; 875 attr->decap_pkt_reformat = d->pkt_reformat; 876 list_add(&flow->l3_to_l2_reformat, &d->flows); 877 mutex_unlock(&esw->offloads.decap_tbl_lock); 878 return 0; 879 880 out_free: 881 mutex_unlock(&esw->offloads.decap_tbl_lock); 882 mlx5e_decap_put(priv, d); 883 return err; 884 885 out_err: 886 mutex_unlock(&esw->offloads.decap_tbl_lock); 887 return err; 888 } 889 890 static int cmp_route_info(struct mlx5e_route_key *a, 891 struct mlx5e_route_key *b) 892 { 893 if (a->ip_version == 4 && b->ip_version == 4) 894 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, 895 sizeof(a->endpoint_ip.v4)); 896 else if (a->ip_version == 6 && b->ip_version == 6) 897 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, 898 sizeof(a->endpoint_ip.v6)); 899 return 1; 900 } 901 902 static u32 hash_route_info(struct mlx5e_route_key *key) 903 { 904 if (key->ip_version == 4) 905 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); 906 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); 907 } 908 909 static void mlx5e_route_dealloc(struct mlx5e_priv *priv, 910 struct mlx5e_route_entry *r) 911 { 912 WARN_ON(!list_empty(&r->decap_flows)); 913 WARN_ON(!list_empty(&r->encap_entries)); 914 915 kfree_rcu(r, rcu); 916 } 917 918 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 919 { 920 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 921 922 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) 923 return; 924 925 hash_del_rcu(&r->hlist); 926 mutex_unlock(&esw->offloads.encap_tbl_lock); 927 928 mlx5e_route_dealloc(priv, r); 929 } 930 931 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 932 { 933 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 934 935 lockdep_assert_held(&esw->offloads.encap_tbl_lock); 936 937 if (!refcount_dec_and_test(&r->refcnt)) 938 return; 939 hash_del_rcu(&r->hlist); 940 mlx5e_route_dealloc(priv, r); 941 } 942 943 static struct mlx5e_route_entry * 944 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, 945 u32 hash_key) 946 { 947 struct mlx5e_route_key r_key; 948 struct mlx5e_route_entry *r; 949 950 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { 951 r_key = r->key; 952 if (!cmp_route_info(&r_key, key) && 953 refcount_inc_not_zero(&r->refcnt)) 954 return r; 955 } 956 return NULL; 957 } 958 959 static struct mlx5e_route_entry * 960 mlx5e_route_get_create(struct mlx5e_priv *priv, 961 struct mlx5e_route_key *key, 962 int tunnel_dev_index, 963 unsigned long *route_tbl_change_time) 964 { 965 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 966 struct mlx5_rep_uplink_priv *uplink_priv; 967 struct mlx5e_rep_priv *uplink_rpriv; 968 struct mlx5e_tc_tun_encap *encap; 969 struct mlx5e_route_entry *r; 970 u32 hash_key; 971 972 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 973 uplink_priv = &uplink_rpriv->uplink_priv; 974 encap = uplink_priv->encap; 975 976 hash_key = hash_route_info(key); 977 spin_lock_bh(&encap->route_lock); 978 r = mlx5e_route_get(encap, key, hash_key); 979 spin_unlock_bh(&encap->route_lock); 980 if (r) { 981 if (!mlx5e_route_entry_valid(r)) { 982 mlx5e_route_put_locked(priv, r); 983 return ERR_PTR(-EINVAL); 984 } 985 return r; 986 } 987 988 r = kzalloc(sizeof(*r), GFP_KERNEL); 989 if (!r) 990 return ERR_PTR(-ENOMEM); 991 992 r->key = *key; 993 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 994 r->tunnel_dev_index = tunnel_dev_index; 995 refcount_set(&r->refcnt, 1); 996 INIT_LIST_HEAD(&r->decap_flows); 997 INIT_LIST_HEAD(&r->encap_entries); 998 999 spin_lock_bh(&encap->route_lock); 1000 *route_tbl_change_time = encap->route_tbl_last_update; 1001 hash_add(encap->route_tbl, &r->hlist, hash_key); 1002 spin_unlock_bh(&encap->route_lock); 1003 1004 return r; 1005 } 1006 1007 static struct mlx5e_route_entry * 1008 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) 1009 { 1010 u32 hash_key = hash_route_info(key); 1011 struct mlx5e_route_entry *r; 1012 1013 spin_lock_bh(&encap->route_lock); 1014 encap->route_tbl_last_update = jiffies; 1015 r = mlx5e_route_get(encap, key, hash_key); 1016 spin_unlock_bh(&encap->route_lock); 1017 1018 return r; 1019 } 1020 1021 struct mlx5e_tc_fib_event_data { 1022 struct work_struct work; 1023 unsigned long event; 1024 struct mlx5e_route_entry *r; 1025 struct net_device *ul_dev; 1026 }; 1027 1028 static void mlx5e_tc_fib_event_work(struct work_struct *work); 1029 static struct mlx5e_tc_fib_event_data * 1030 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) 1031 { 1032 struct mlx5e_tc_fib_event_data *fib_work; 1033 1034 fib_work = kzalloc(sizeof(*fib_work), flags); 1035 if (WARN_ON(!fib_work)) 1036 return NULL; 1037 1038 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); 1039 fib_work->event = event; 1040 fib_work->ul_dev = ul_dev; 1041 1042 return fib_work; 1043 } 1044 1045 static int 1046 mlx5e_route_enqueue_update(struct mlx5e_priv *priv, 1047 struct mlx5e_route_entry *r, 1048 unsigned long event) 1049 { 1050 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1051 struct mlx5e_tc_fib_event_data *fib_work; 1052 struct mlx5e_rep_priv *uplink_rpriv; 1053 struct net_device *ul_dev; 1054 1055 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1056 ul_dev = uplink_rpriv->netdev; 1057 1058 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); 1059 if (!fib_work) 1060 return -ENOMEM; 1061 1062 dev_hold(ul_dev); 1063 refcount_inc(&r->refcnt); 1064 fib_work->r = r; 1065 queue_work(priv->wq, &fib_work->work); 1066 1067 return 0; 1068 } 1069 1070 int mlx5e_attach_decap_route(struct mlx5e_priv *priv, 1071 struct mlx5e_tc_flow *flow) 1072 { 1073 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1074 unsigned long tbl_time_before, tbl_time_after; 1075 struct mlx5e_tc_flow_parse_attr *parse_attr; 1076 struct mlx5_flow_attr *attr = flow->attr; 1077 struct mlx5_esw_flow_attr *esw_attr; 1078 struct mlx5e_route_entry *r; 1079 struct mlx5e_route_key key; 1080 int err = 0; 1081 1082 esw_attr = attr->esw_attr; 1083 parse_attr = attr->parse_attr; 1084 mutex_lock(&esw->offloads.encap_tbl_lock); 1085 if (!esw_attr->rx_tun_attr) 1086 goto out; 1087 1088 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 1089 tbl_time_after = tbl_time_before; 1090 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr); 1091 if (err || !esw_attr->rx_tun_attr->decap_vport) 1092 goto out; 1093 1094 key.ip_version = attr->ip_version; 1095 if (key.ip_version == 4) 1096 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; 1097 else 1098 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; 1099 1100 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, 1101 &tbl_time_after); 1102 if (IS_ERR(r)) { 1103 err = PTR_ERR(r); 1104 goto out; 1105 } 1106 /* Routing changed concurrently. FIB event handler might have missed new 1107 * entry, schedule update. 1108 */ 1109 if (tbl_time_before != tbl_time_after) { 1110 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1111 if (err) { 1112 mlx5e_route_put_locked(priv, r); 1113 goto out; 1114 } 1115 } 1116 1117 flow->decap_route = r; 1118 list_add(&flow->decap_routes, &r->decap_flows); 1119 mutex_unlock(&esw->offloads.encap_tbl_lock); 1120 return 0; 1121 1122 out: 1123 mutex_unlock(&esw->offloads.encap_tbl_lock); 1124 return err; 1125 } 1126 1127 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 1128 struct mlx5e_tc_flow *flow, 1129 struct mlx5e_encap_entry *e, 1130 bool new_encap_entry, 1131 unsigned long tbl_time_before, 1132 int out_index) 1133 { 1134 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1135 unsigned long tbl_time_after = tbl_time_before; 1136 struct mlx5e_tc_flow_parse_attr *parse_attr; 1137 struct mlx5_flow_attr *attr = flow->attr; 1138 const struct ip_tunnel_info *tun_info; 1139 struct mlx5_esw_flow_attr *esw_attr; 1140 struct mlx5e_route_entry *r; 1141 struct mlx5e_route_key key; 1142 unsigned short family; 1143 int err = 0; 1144 1145 esw_attr = attr->esw_attr; 1146 parse_attr = attr->parse_attr; 1147 tun_info = parse_attr->tun_info[out_index]; 1148 family = ip_tunnel_info_af(tun_info); 1149 1150 if (family == AF_INET) { 1151 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; 1152 key.ip_version = 4; 1153 } else if (family == AF_INET6) { 1154 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; 1155 key.ip_version = 6; 1156 } 1157 1158 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, 1159 e->route_dev_ifindex, out_index); 1160 if (err || !(esw_attr->dests[out_index].flags & 1161 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) 1162 return err; 1163 1164 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], 1165 &tbl_time_after); 1166 if (IS_ERR(r)) 1167 return PTR_ERR(r); 1168 /* Routing changed concurrently. FIB event handler might have missed new 1169 * entry, schedule update. 1170 */ 1171 if (tbl_time_before != tbl_time_after) { 1172 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1173 if (err) { 1174 mlx5e_route_put_locked(priv, r); 1175 return err; 1176 } 1177 } 1178 1179 flow->encap_routes[out_index].r = r; 1180 if (new_encap_entry) 1181 list_add(&e->route_list, &r->encap_entries); 1182 flow->encap_routes[out_index].index = out_index; 1183 return 0; 1184 } 1185 1186 void mlx5e_detach_decap_route(struct mlx5e_priv *priv, 1187 struct mlx5e_tc_flow *flow) 1188 { 1189 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1190 struct mlx5e_route_entry *r = flow->decap_route; 1191 1192 if (!r) 1193 return; 1194 1195 mutex_lock(&esw->offloads.encap_tbl_lock); 1196 list_del(&flow->decap_routes); 1197 flow->decap_route = NULL; 1198 1199 if (!refcount_dec_and_test(&r->refcnt)) { 1200 mutex_unlock(&esw->offloads.encap_tbl_lock); 1201 return; 1202 } 1203 hash_del_rcu(&r->hlist); 1204 mutex_unlock(&esw->offloads.encap_tbl_lock); 1205 1206 mlx5e_route_dealloc(priv, r); 1207 } 1208 1209 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 1210 struct mlx5e_tc_flow *flow, 1211 int out_index) 1212 { 1213 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; 1214 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1215 struct mlx5e_encap_entry *e, *tmp; 1216 1217 if (!r) 1218 return; 1219 1220 mutex_lock(&esw->offloads.encap_tbl_lock); 1221 flow->encap_routes[out_index].r = NULL; 1222 1223 if (!refcount_dec_and_test(&r->refcnt)) { 1224 mutex_unlock(&esw->offloads.encap_tbl_lock); 1225 return; 1226 } 1227 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) 1228 list_del_init(&e->route_list); 1229 hash_del_rcu(&r->hlist); 1230 mutex_unlock(&esw->offloads.encap_tbl_lock); 1231 1232 mlx5e_route_dealloc(priv, r); 1233 } 1234 1235 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, 1236 struct mlx5e_encap_entry *e, 1237 struct list_head *encap_flows) 1238 { 1239 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1240 struct mlx5e_tc_flow *flow; 1241 1242 list_for_each_entry(flow, encap_flows, tmp_list) { 1243 struct mlx5_flow_attr *attr = flow->attr; 1244 struct mlx5_esw_flow_attr *esw_attr; 1245 1246 if (!mlx5e_is_offloaded_flow(flow)) 1247 continue; 1248 esw_attr = attr->esw_attr; 1249 1250 if (flow_flag_test(flow, SLOW)) 1251 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1252 else 1253 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1254 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); 1255 attr->modify_hdr = NULL; 1256 1257 esw_attr->dests[flow->tmp_entry_index].flags &= 1258 ~MLX5_ESW_DEST_ENCAP_VALID; 1259 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; 1260 } 1261 1262 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; 1263 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1264 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 1265 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 1266 e->pkt_reformat = NULL; 1267 } 1268 } 1269 1270 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, 1271 struct net_device *tunnel_dev, 1272 struct mlx5e_encap_entry *e, 1273 struct list_head *encap_flows) 1274 { 1275 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1276 struct mlx5e_tc_flow *flow; 1277 int err; 1278 1279 err = ip_tunnel_info_af(e->tun_info) == AF_INET ? 1280 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : 1281 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); 1282 if (err) 1283 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); 1284 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; 1285 1286 list_for_each_entry(flow, encap_flows, tmp_list) { 1287 struct mlx5e_tc_flow_parse_attr *parse_attr; 1288 struct mlx5_flow_attr *attr = flow->attr; 1289 struct mlx5_esw_flow_attr *esw_attr; 1290 struct mlx5_flow_handle *rule; 1291 struct mlx5_flow_spec *spec; 1292 1293 if (flow_flag_test(flow, FAILED)) 1294 continue; 1295 1296 esw_attr = attr->esw_attr; 1297 parse_attr = attr->parse_attr; 1298 spec = &parse_attr->spec; 1299 1300 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, 1301 e->out_dev, e->route_dev_ifindex, 1302 flow->tmp_entry_index); 1303 if (err) { 1304 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); 1305 continue; 1306 } 1307 1308 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); 1309 if (err) { 1310 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", 1311 err); 1312 continue; 1313 } 1314 1315 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1316 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 1317 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 1318 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 1319 goto offload_to_slow_path; 1320 /* update from slow path rule to encap rule */ 1321 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1322 if (IS_ERR(rule)) { 1323 err = PTR_ERR(rule); 1324 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 1325 err); 1326 } else { 1327 flow->rule[0] = rule; 1328 } 1329 } else { 1330 offload_to_slow_path: 1331 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 1332 /* mark the flow's encap dest as non-valid */ 1333 esw_attr->dests[flow->tmp_entry_index].flags &= 1334 ~MLX5_ESW_DEST_ENCAP_VALID; 1335 1336 if (IS_ERR(rule)) { 1337 err = PTR_ERR(rule); 1338 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 1339 err); 1340 } else { 1341 flow->rule[0] = rule; 1342 } 1343 } 1344 flow_flag_set(flow, OFFLOADED); 1345 } 1346 } 1347 1348 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, 1349 struct mlx5e_route_entry *r, 1350 struct list_head *flow_list, 1351 bool replace) 1352 { 1353 struct net_device *tunnel_dev; 1354 struct mlx5e_encap_entry *e; 1355 1356 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1357 if (!tunnel_dev) 1358 return -ENODEV; 1359 1360 list_for_each_entry(e, &r->encap_entries, route_list) { 1361 LIST_HEAD(encap_flows); 1362 1363 mlx5e_take_all_encap_flows(e, &encap_flows); 1364 if (list_empty(&encap_flows)) 1365 continue; 1366 1367 if (mlx5e_route_entry_valid(r)) 1368 mlx5e_invalidate_encap(priv, e, &encap_flows); 1369 1370 if (!replace) { 1371 list_splice(&encap_flows, flow_list); 1372 continue; 1373 } 1374 1375 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); 1376 list_splice(&encap_flows, flow_list); 1377 } 1378 1379 return 0; 1380 } 1381 1382 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, 1383 struct list_head *flow_list) 1384 { 1385 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1386 struct mlx5e_tc_flow *flow; 1387 1388 list_for_each_entry(flow, flow_list, tmp_list) 1389 if (mlx5e_is_offloaded_flow(flow)) 1390 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1391 } 1392 1393 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, 1394 struct list_head *decap_flows) 1395 { 1396 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1397 struct mlx5e_tc_flow *flow; 1398 1399 list_for_each_entry(flow, decap_flows, tmp_list) { 1400 struct mlx5e_tc_flow_parse_attr *parse_attr; 1401 struct mlx5_flow_attr *attr = flow->attr; 1402 struct mlx5_flow_handle *rule; 1403 struct mlx5_flow_spec *spec; 1404 int err; 1405 1406 if (flow_flag_test(flow, FAILED)) 1407 continue; 1408 1409 parse_attr = attr->parse_attr; 1410 spec = &parse_attr->spec; 1411 err = mlx5e_tc_tun_route_lookup(priv, spec, attr); 1412 if (err) { 1413 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", 1414 err); 1415 continue; 1416 } 1417 1418 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1419 if (IS_ERR(rule)) { 1420 err = PTR_ERR(rule); 1421 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", 1422 err); 1423 } else { 1424 flow->rule[0] = rule; 1425 flow_flag_set(flow, OFFLOADED); 1426 } 1427 } 1428 } 1429 1430 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, 1431 struct mlx5e_route_entry *r, 1432 struct list_head *flow_list, 1433 bool replace) 1434 { 1435 struct net_device *tunnel_dev; 1436 LIST_HEAD(decap_flows); 1437 1438 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1439 if (!tunnel_dev) 1440 return -ENODEV; 1441 1442 mlx5e_take_all_route_decap_flows(r, &decap_flows); 1443 if (mlx5e_route_entry_valid(r)) 1444 mlx5e_unoffload_flow_list(priv, &decap_flows); 1445 if (replace) 1446 mlx5e_reoffload_decap(priv, &decap_flows); 1447 1448 list_splice(&decap_flows, flow_list); 1449 1450 return 0; 1451 } 1452 1453 static void mlx5e_tc_fib_event_work(struct work_struct *work) 1454 { 1455 struct mlx5e_tc_fib_event_data *event_data = 1456 container_of(work, struct mlx5e_tc_fib_event_data, work); 1457 struct net_device *ul_dev = event_data->ul_dev; 1458 struct mlx5e_priv *priv = netdev_priv(ul_dev); 1459 struct mlx5e_route_entry *r = event_data->r; 1460 struct mlx5_eswitch *esw; 1461 LIST_HEAD(flow_list); 1462 bool replace; 1463 int err; 1464 1465 /* sync with concurrent neigh updates */ 1466 rtnl_lock(); 1467 esw = priv->mdev->priv.eswitch; 1468 mutex_lock(&esw->offloads.encap_tbl_lock); 1469 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; 1470 1471 if (!mlx5e_route_entry_valid(r) && !replace) 1472 goto out; 1473 1474 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); 1475 if (err) 1476 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", 1477 err); 1478 1479 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); 1480 if (err) 1481 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", 1482 err); 1483 1484 if (replace) 1485 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1486 out: 1487 mutex_unlock(&esw->offloads.encap_tbl_lock); 1488 rtnl_unlock(); 1489 1490 mlx5e_put_flow_list(priv, &flow_list); 1491 mlx5e_route_put(priv, event_data->r); 1492 dev_put(event_data->ul_dev); 1493 kfree(event_data); 1494 } 1495 1496 static struct mlx5e_tc_fib_event_data * 1497 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, 1498 struct net_device *ul_dev, 1499 struct mlx5e_tc_tun_encap *encap, 1500 unsigned long event, 1501 struct fib_notifier_info *info) 1502 { 1503 struct fib_entry_notifier_info *fen_info; 1504 struct mlx5e_tc_fib_event_data *fib_work; 1505 struct mlx5e_route_entry *r; 1506 struct mlx5e_route_key key; 1507 struct net_device *fib_dev; 1508 1509 fen_info = container_of(info, struct fib_entry_notifier_info, info); 1510 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; 1511 if (fib_dev->netdev_ops != &mlx5e_netdev_ops || 1512 fen_info->dst_len != 32) 1513 return NULL; 1514 1515 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1516 if (!fib_work) 1517 return ERR_PTR(-ENOMEM); 1518 1519 key.endpoint_ip.v4 = htonl(fen_info->dst); 1520 key.ip_version = 4; 1521 1522 /* Can't fail after this point because releasing reference to r 1523 * requires obtaining sleeping mutex which we can't do in atomic 1524 * context. 1525 */ 1526 r = mlx5e_route_lookup_for_update(encap, &key); 1527 if (!r) 1528 goto out; 1529 fib_work->r = r; 1530 dev_hold(ul_dev); 1531 1532 return fib_work; 1533 1534 out: 1535 kfree(fib_work); 1536 return NULL; 1537 } 1538 1539 static struct mlx5e_tc_fib_event_data * 1540 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, 1541 struct net_device *ul_dev, 1542 struct mlx5e_tc_tun_encap *encap, 1543 unsigned long event, 1544 struct fib_notifier_info *info) 1545 { 1546 struct fib6_entry_notifier_info *fen_info; 1547 struct mlx5e_tc_fib_event_data *fib_work; 1548 struct mlx5e_route_entry *r; 1549 struct mlx5e_route_key key; 1550 struct net_device *fib_dev; 1551 1552 fen_info = container_of(info, struct fib6_entry_notifier_info, info); 1553 fib_dev = fib6_info_nh_dev(fen_info->rt); 1554 if (fib_dev->netdev_ops != &mlx5e_netdev_ops || 1555 fen_info->rt->fib6_dst.plen != 128) 1556 return NULL; 1557 1558 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1559 if (!fib_work) 1560 return ERR_PTR(-ENOMEM); 1561 1562 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, 1563 sizeof(fen_info->rt->fib6_dst.addr)); 1564 key.ip_version = 6; 1565 1566 /* Can't fail after this point because releasing reference to r 1567 * requires obtaining sleeping mutex which we can't do in atomic 1568 * context. 1569 */ 1570 r = mlx5e_route_lookup_for_update(encap, &key); 1571 if (!r) 1572 goto out; 1573 fib_work->r = r; 1574 dev_hold(ul_dev); 1575 1576 return fib_work; 1577 1578 out: 1579 kfree(fib_work); 1580 return NULL; 1581 } 1582 1583 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) 1584 { 1585 struct mlx5e_tc_fib_event_data *fib_work; 1586 struct fib_notifier_info *info = ptr; 1587 struct mlx5e_tc_tun_encap *encap; 1588 struct net_device *ul_dev; 1589 struct mlx5e_priv *priv; 1590 1591 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); 1592 priv = encap->priv; 1593 ul_dev = priv->netdev; 1594 priv = netdev_priv(ul_dev); 1595 1596 switch (event) { 1597 case FIB_EVENT_ENTRY_REPLACE: 1598 case FIB_EVENT_ENTRY_DEL: 1599 if (info->family == AF_INET) 1600 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); 1601 else if (info->family == AF_INET6) 1602 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); 1603 else 1604 return NOTIFY_DONE; 1605 1606 if (!IS_ERR_OR_NULL(fib_work)) { 1607 queue_work(priv->wq, &fib_work->work); 1608 } else if (IS_ERR(fib_work)) { 1609 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); 1610 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", 1611 PTR_ERR(fib_work)); 1612 } 1613 1614 break; 1615 default: 1616 return NOTIFY_DONE; 1617 } 1618 1619 return NOTIFY_DONE; 1620 } 1621 1622 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) 1623 { 1624 struct mlx5e_tc_tun_encap *encap; 1625 int err; 1626 1627 encap = kvzalloc(sizeof(*encap), GFP_KERNEL); 1628 if (!encap) 1629 return ERR_PTR(-ENOMEM); 1630 1631 encap->priv = priv; 1632 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; 1633 spin_lock_init(&encap->route_lock); 1634 hash_init(encap->route_tbl); 1635 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, 1636 NULL, NULL); 1637 if (err) { 1638 kvfree(encap); 1639 return ERR_PTR(err); 1640 } 1641 1642 return encap; 1643 } 1644 1645 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) 1646 { 1647 if (!encap) 1648 return; 1649 1650 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); 1651 flush_workqueue(encap->priv->wq); /* flush fib event works */ 1652 kvfree(encap); 1653 } 1654