1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <net/fib_notifier.h> 5 #include <net/nexthop.h> 6 #include "tc_tun_encap.h" 7 #include "en_tc.h" 8 #include "tc_tun.h" 9 #include "rep/tc.h" 10 #include "diag/en_tc_tracepoint.h" 11 12 enum { 13 MLX5E_ROUTE_ENTRY_VALID = BIT(0), 14 }; 15 16 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, 17 struct mlx5_flow_attr *attr, 18 struct mlx5e_encap_entry *e, 19 int out_index) 20 { 21 struct net_device *route_dev; 22 int err = 0; 23 24 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); 25 26 if (!route_dev || !netif_is_ovs_master(route_dev)) 27 goto out; 28 29 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, 30 MLX5E_TC_INT_PORT_EGRESS, 31 &attr->action, out_index); 32 33 out: 34 if (route_dev) 35 dev_put(route_dev); 36 37 return err; 38 } 39 40 struct mlx5e_route_key { 41 int ip_version; 42 union { 43 __be32 v4; 44 struct in6_addr v6; 45 } endpoint_ip; 46 }; 47 48 struct mlx5e_route_entry { 49 struct mlx5e_route_key key; 50 struct list_head encap_entries; 51 struct list_head decap_flows; 52 u32 flags; 53 struct hlist_node hlist; 54 refcount_t refcnt; 55 int tunnel_dev_index; 56 struct rcu_head rcu; 57 }; 58 59 struct mlx5e_tc_tun_encap { 60 struct mlx5e_priv *priv; 61 struct notifier_block fib_nb; 62 spinlock_t route_lock; /* protects route_tbl */ 63 unsigned long route_tbl_last_update; 64 DECLARE_HASHTABLE(route_tbl, 8); 65 }; 66 67 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) 68 { 69 return r->flags & MLX5E_ROUTE_ENTRY_VALID; 70 } 71 72 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, 73 struct mlx5_flow_spec *spec) 74 { 75 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 76 struct mlx5_rx_tun_attr *tun_attr; 77 void *daddr, *saddr; 78 u8 ip_version; 79 80 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); 81 if (!tun_attr) 82 return -ENOMEM; 83 84 esw_attr->rx_tun_attr = tun_attr; 85 ip_version = mlx5e_tc_get_ip_version(spec, true); 86 87 if (ip_version == 4) { 88 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 89 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 90 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 91 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); 92 tun_attr->dst_ip.v4 = *(__be32 *)daddr; 93 tun_attr->src_ip.v4 = *(__be32 *)saddr; 94 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) 95 return 0; 96 } 97 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) 98 else if (ip_version == 6) { 99 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); 100 struct in6_addr zerov6 = {}; 101 102 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 103 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); 104 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, 105 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); 106 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); 107 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); 108 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || 109 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) 110 return 0; 111 } 112 #endif 113 /* Only set the flag if both src and dst ip addresses exist. They are 114 * required to establish routing. 115 */ 116 flow_flag_set(flow, TUN_RX); 117 flow->attr->tun_ip_version = ip_version; 118 return 0; 119 } 120 121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) 122 { 123 bool all_flow_encaps_valid = true; 124 int i; 125 126 /* Flow can be associated with multiple encap entries. 127 * Before offloading the flow verify that all of them have 128 * a valid neighbour. 129 */ 130 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 131 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) 132 continue; 133 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { 134 all_flow_encaps_valid = false; 135 break; 136 } 137 } 138 139 return all_flow_encaps_valid; 140 } 141 142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, 143 struct mlx5e_encap_entry *e, 144 struct list_head *flow_list) 145 { 146 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 147 struct mlx5_pkt_reformat_params reformat_params; 148 struct mlx5_esw_flow_attr *esw_attr; 149 struct mlx5_flow_handle *rule; 150 struct mlx5_flow_attr *attr; 151 struct mlx5_flow_spec *spec; 152 struct mlx5e_tc_flow *flow; 153 int err; 154 155 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) 156 return; 157 158 memset(&reformat_params, 0, sizeof(reformat_params)); 159 reformat_params.type = e->reformat_type; 160 reformat_params.size = e->encap_size; 161 reformat_params.data = e->encap_header; 162 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 163 &reformat_params, 164 MLX5_FLOW_NAMESPACE_FDB); 165 if (IS_ERR(e->pkt_reformat)) { 166 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", 167 PTR_ERR(e->pkt_reformat)); 168 return; 169 } 170 e->flags |= MLX5_ENCAP_ENTRY_VALID; 171 mlx5e_rep_queue_neigh_stats_work(priv); 172 173 list_for_each_entry(flow, flow_list, tmp_list) { 174 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) 175 continue; 176 attr = flow->attr; 177 esw_attr = attr->esw_attr; 178 spec = &attr->parse_attr->spec; 179 180 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 181 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 182 183 /* Do not offload flows with unresolved neighbors */ 184 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 185 continue; 186 /* update from slow path rule to encap rule */ 187 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 188 if (IS_ERR(rule)) { 189 err = PTR_ERR(rule); 190 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 191 err); 192 continue; 193 } 194 195 mlx5e_tc_unoffload_from_slow_path(esw, flow); 196 flow->rule[0] = rule; 197 /* was unset when slow path rule removed */ 198 flow_flag_set(flow, OFFLOADED); 199 } 200 } 201 202 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, 203 struct mlx5e_encap_entry *e, 204 struct list_head *flow_list) 205 { 206 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 207 struct mlx5_esw_flow_attr *esw_attr; 208 struct mlx5_flow_handle *rule; 209 struct mlx5_flow_attr *attr; 210 struct mlx5_flow_spec *spec; 211 struct mlx5e_tc_flow *flow; 212 int err; 213 214 list_for_each_entry(flow, flow_list, tmp_list) { 215 if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) 216 continue; 217 attr = flow->attr; 218 esw_attr = attr->esw_attr; 219 spec = &attr->parse_attr->spec; 220 221 /* update from encap rule to slow path rule */ 222 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 223 /* mark the flow's encap dest as non-valid */ 224 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; 225 226 if (IS_ERR(rule)) { 227 err = PTR_ERR(rule); 228 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 229 err); 230 continue; 231 } 232 233 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 234 flow->rule[0] = rule; 235 /* was unset when fast path rule removed */ 236 flow_flag_set(flow, OFFLOADED); 237 } 238 239 /* we know that the encap is valid */ 240 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 241 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 242 } 243 244 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, 245 struct list_head *flow_list, 246 int index) 247 { 248 if (IS_ERR(mlx5e_flow_get(flow))) { 249 /* Flow is being deleted concurrently. Wait for it to be 250 * unoffloaded from hardware, otherwise deleting encap will 251 * fail. 252 */ 253 wait_for_completion(&flow->del_hw_done); 254 return; 255 } 256 wait_for_completion(&flow->init_done); 257 258 flow->tmp_entry_index = index; 259 list_add(&flow->tmp_list, flow_list); 260 } 261 262 /* Takes reference to all flows attached to encap and adds the flows to 263 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 264 */ 265 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) 266 { 267 struct encap_flow_item *efi; 268 struct mlx5e_tc_flow *flow; 269 270 list_for_each_entry(efi, &e->flows, list) { 271 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); 272 mlx5e_take_tmp_flow(flow, flow_list, efi->index); 273 } 274 } 275 276 /* Takes reference to all flows attached to route and adds the flows to 277 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 278 */ 279 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, 280 struct list_head *flow_list) 281 { 282 struct mlx5e_tc_flow *flow; 283 284 list_for_each_entry(flow, &r->decap_flows, decap_routes) 285 mlx5e_take_tmp_flow(flow, flow_list, 0); 286 } 287 288 typedef bool (match_cb)(struct mlx5e_encap_entry *); 289 290 static struct mlx5e_encap_entry * 291 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, 292 struct mlx5e_encap_entry *e, 293 match_cb match) 294 { 295 struct mlx5e_encap_entry *next = NULL; 296 297 retry: 298 rcu_read_lock(); 299 300 /* find encap with non-zero reference counter value */ 301 for (next = e ? 302 list_next_or_null_rcu(&nhe->encap_list, 303 &e->encap_list, 304 struct mlx5e_encap_entry, 305 encap_list) : 306 list_first_or_null_rcu(&nhe->encap_list, 307 struct mlx5e_encap_entry, 308 encap_list); 309 next; 310 next = list_next_or_null_rcu(&nhe->encap_list, 311 &next->encap_list, 312 struct mlx5e_encap_entry, 313 encap_list)) 314 if (mlx5e_encap_take(next)) 315 break; 316 317 rcu_read_unlock(); 318 319 /* release starting encap */ 320 if (e) 321 mlx5e_encap_put(netdev_priv(e->out_dev), e); 322 if (!next) 323 return next; 324 325 /* wait for encap to be fully initialized */ 326 wait_for_completion(&next->res_ready); 327 /* continue searching if encap entry is not in valid state after completion */ 328 if (!match(next)) { 329 e = next; 330 goto retry; 331 } 332 333 return next; 334 } 335 336 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) 337 { 338 return e->flags & MLX5_ENCAP_ENTRY_VALID; 339 } 340 341 static struct mlx5e_encap_entry * 342 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 343 struct mlx5e_encap_entry *e) 344 { 345 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); 346 } 347 348 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) 349 { 350 return e->compl_result >= 0; 351 } 352 353 struct mlx5e_encap_entry * 354 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, 355 struct mlx5e_encap_entry *e) 356 { 357 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); 358 } 359 360 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) 361 { 362 struct mlx5e_neigh *m_neigh = &nhe->m_neigh; 363 struct mlx5e_encap_entry *e = NULL; 364 struct mlx5e_tc_flow *flow; 365 struct mlx5_fc *counter; 366 struct neigh_table *tbl; 367 bool neigh_used = false; 368 struct neighbour *n; 369 u64 lastuse; 370 371 if (m_neigh->family == AF_INET) 372 tbl = &arp_tbl; 373 #if IS_ENABLED(CONFIG_IPV6) 374 else if (m_neigh->family == AF_INET6) 375 tbl = ipv6_stub->nd_tbl; 376 #endif 377 else 378 return; 379 380 /* mlx5e_get_next_valid_encap() releases previous encap before returning 381 * next one. 382 */ 383 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { 384 struct mlx5e_priv *priv = netdev_priv(e->out_dev); 385 struct encap_flow_item *efi, *tmp; 386 struct mlx5_eswitch *esw; 387 LIST_HEAD(flow_list); 388 389 esw = priv->mdev->priv.eswitch; 390 mutex_lock(&esw->offloads.encap_tbl_lock); 391 list_for_each_entry_safe(efi, tmp, &e->flows, list) { 392 flow = container_of(efi, struct mlx5e_tc_flow, 393 encaps[efi->index]); 394 if (IS_ERR(mlx5e_flow_get(flow))) 395 continue; 396 list_add(&flow->tmp_list, &flow_list); 397 398 if (mlx5e_is_offloaded_flow(flow)) { 399 counter = mlx5e_tc_get_counter(flow); 400 lastuse = mlx5_fc_query_lastuse(counter); 401 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { 402 neigh_used = true; 403 break; 404 } 405 } 406 } 407 mutex_unlock(&esw->offloads.encap_tbl_lock); 408 409 mlx5e_put_flow_list(priv, &flow_list); 410 if (neigh_used) { 411 /* release current encap before breaking the loop */ 412 mlx5e_encap_put(priv, e); 413 break; 414 } 415 } 416 417 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); 418 419 if (neigh_used) { 420 nhe->reported_lastuse = jiffies; 421 422 /* find the relevant neigh according to the cached device and 423 * dst ip pair 424 */ 425 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); 426 if (!n) 427 return; 428 429 neigh_event_send(n, NULL); 430 neigh_release(n); 431 } 432 } 433 434 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 435 { 436 WARN_ON(!list_empty(&e->flows)); 437 438 if (e->compl_result > 0) { 439 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); 440 441 if (e->flags & MLX5_ENCAP_ENTRY_VALID) 442 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 443 } 444 445 kfree(e->tun_info); 446 kfree(e->encap_header); 447 kfree_rcu(e, rcu); 448 } 449 450 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, 451 struct mlx5e_decap_entry *d) 452 { 453 WARN_ON(!list_empty(&d->flows)); 454 455 if (!d->compl_result) 456 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); 457 458 kfree_rcu(d, rcu); 459 } 460 461 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 462 { 463 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 464 465 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) 466 return; 467 list_del(&e->route_list); 468 hash_del_rcu(&e->encap_hlist); 469 mutex_unlock(&esw->offloads.encap_tbl_lock); 470 471 mlx5e_encap_dealloc(priv, e); 472 } 473 474 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) 475 { 476 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 477 478 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) 479 return; 480 hash_del_rcu(&d->hlist); 481 mutex_unlock(&esw->offloads.decap_tbl_lock); 482 483 mlx5e_decap_dealloc(priv, d); 484 } 485 486 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 487 struct mlx5e_tc_flow *flow, 488 int out_index); 489 490 void mlx5e_detach_encap(struct mlx5e_priv *priv, 491 struct mlx5e_tc_flow *flow, int out_index) 492 { 493 struct mlx5e_encap_entry *e = flow->encaps[out_index].e; 494 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 495 496 if (flow->attr->esw_attr->dests[out_index].flags & 497 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) 498 mlx5e_detach_encap_route(priv, flow, out_index); 499 500 /* flow wasn't fully initialized */ 501 if (!e) 502 return; 503 504 mutex_lock(&esw->offloads.encap_tbl_lock); 505 list_del(&flow->encaps[out_index].list); 506 flow->encaps[out_index].e = NULL; 507 if (!refcount_dec_and_test(&e->refcnt)) { 508 mutex_unlock(&esw->offloads.encap_tbl_lock); 509 return; 510 } 511 list_del(&e->route_list); 512 hash_del_rcu(&e->encap_hlist); 513 mutex_unlock(&esw->offloads.encap_tbl_lock); 514 515 mlx5e_encap_dealloc(priv, e); 516 } 517 518 void mlx5e_detach_decap(struct mlx5e_priv *priv, 519 struct mlx5e_tc_flow *flow) 520 { 521 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 522 struct mlx5e_decap_entry *d = flow->decap_reformat; 523 524 if (!d) 525 return; 526 527 mutex_lock(&esw->offloads.decap_tbl_lock); 528 list_del(&flow->l3_to_l2_reformat); 529 flow->decap_reformat = NULL; 530 531 if (!refcount_dec_and_test(&d->refcnt)) { 532 mutex_unlock(&esw->offloads.decap_tbl_lock); 533 return; 534 } 535 hash_del_rcu(&d->hlist); 536 mutex_unlock(&esw->offloads.decap_tbl_lock); 537 538 mlx5e_decap_dealloc(priv, d); 539 } 540 541 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, 542 struct mlx5e_encap_key *b) 543 { 544 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && 545 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; 546 } 547 548 static int cmp_decap_info(struct mlx5e_decap_key *a, 549 struct mlx5e_decap_key *b) 550 { 551 return memcmp(&a->key, &b->key, sizeof(b->key)); 552 } 553 554 static int hash_encap_info(struct mlx5e_encap_key *key) 555 { 556 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), 557 key->tc_tunnel->tunnel_type); 558 } 559 560 static int hash_decap_info(struct mlx5e_decap_key *key) 561 { 562 return jhash(&key->key, sizeof(key->key), 0); 563 } 564 565 bool mlx5e_encap_take(struct mlx5e_encap_entry *e) 566 { 567 return refcount_inc_not_zero(&e->refcnt); 568 } 569 570 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) 571 { 572 return refcount_inc_not_zero(&e->refcnt); 573 } 574 575 static struct mlx5e_encap_entry * 576 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, 577 uintptr_t hash_key) 578 { 579 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 580 struct mlx5e_encap_key e_key; 581 struct mlx5e_encap_entry *e; 582 583 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, 584 encap_hlist, hash_key) { 585 e_key.ip_tun_key = &e->tun_info->key; 586 e_key.tc_tunnel = e->tunnel; 587 if (e->tunnel->encap_info_equal(&e_key, key) && 588 mlx5e_encap_take(e)) 589 return e; 590 } 591 592 return NULL; 593 } 594 595 static struct mlx5e_decap_entry * 596 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, 597 uintptr_t hash_key) 598 { 599 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 600 struct mlx5e_decap_key r_key; 601 struct mlx5e_decap_entry *e; 602 603 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, 604 hlist, hash_key) { 605 r_key = e->key; 606 if (!cmp_decap_info(&r_key, key) && 607 mlx5e_decap_take(e)) 608 return e; 609 } 610 return NULL; 611 } 612 613 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) 614 { 615 size_t tun_size = sizeof(*tun_info) + tun_info->options_len; 616 617 return kmemdup(tun_info, tun_size, GFP_KERNEL); 618 } 619 620 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, 621 struct mlx5e_tc_flow *flow, 622 int out_index, 623 struct mlx5e_encap_entry *e, 624 struct netlink_ext_ack *extack) 625 { 626 int i; 627 628 for (i = 0; i < out_index; i++) { 629 if (flow->encaps[i].e != e) 630 continue; 631 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); 632 netdev_err(priv->netdev, "can't duplicate encap action\n"); 633 return true; 634 } 635 636 return false; 637 } 638 639 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, 640 struct mlx5_flow_attr *attr, 641 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 642 struct net_device *out_dev, 643 int route_dev_ifindex, 644 int out_index) 645 { 646 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 647 struct net_device *route_dev; 648 u16 vport_num; 649 int err = 0; 650 u32 data; 651 652 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 653 654 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 655 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) 656 goto out; 657 658 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 659 if (err) 660 goto out; 661 662 attr->dest_chain = 0; 663 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 664 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 665 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, 666 vport_num); 667 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, 668 MLX5_FLOW_NAMESPACE_FDB, 669 VPORT_TO_REG, data); 670 if (err >= 0) { 671 esw_attr->dests[out_index].src_port_rewrite_act_id = err; 672 err = 0; 673 } 674 675 out: 676 if (route_dev) 677 dev_put(route_dev); 678 return err; 679 } 680 681 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, 682 struct mlx5_esw_flow_attr *attr, 683 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 684 struct net_device *out_dev, 685 int route_dev_ifindex, 686 int out_index) 687 { 688 int act_id = attr->dests[out_index].src_port_rewrite_act_id; 689 struct net_device *route_dev; 690 u16 vport_num; 691 int err = 0; 692 u32 data; 693 694 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); 695 696 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || 697 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { 698 err = -ENODEV; 699 goto out; 700 } 701 702 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); 703 if (err) 704 goto out; 705 706 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, 707 vport_num); 708 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); 709 710 out: 711 if (route_dev) 712 dev_put(route_dev); 713 return err; 714 } 715 716 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) 717 { 718 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 719 struct mlx5_rep_uplink_priv *uplink_priv; 720 struct mlx5e_rep_priv *uplink_rpriv; 721 struct mlx5e_tc_tun_encap *encap; 722 unsigned int ret; 723 724 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 725 uplink_priv = &uplink_rpriv->uplink_priv; 726 encap = uplink_priv->encap; 727 728 spin_lock_bh(&encap->route_lock); 729 ret = encap->route_tbl_last_update; 730 spin_unlock_bh(&encap->route_lock); 731 return ret; 732 } 733 734 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 735 struct mlx5e_tc_flow *flow, 736 struct mlx5e_encap_entry *e, 737 bool new_encap_entry, 738 unsigned long tbl_time_before, 739 int out_index); 740 741 int mlx5e_attach_encap(struct mlx5e_priv *priv, 742 struct mlx5e_tc_flow *flow, 743 struct net_device *mirred_dev, 744 int out_index, 745 struct netlink_ext_ack *extack, 746 struct net_device **encap_dev, 747 bool *encap_valid) 748 { 749 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 750 struct mlx5e_tc_flow_parse_attr *parse_attr; 751 struct mlx5_flow_attr *attr = flow->attr; 752 const struct ip_tunnel_info *tun_info; 753 const struct mlx5e_mpls_info *mpls_info; 754 unsigned long tbl_time_before = 0; 755 struct mlx5e_encap_entry *e; 756 struct mlx5e_encap_key key; 757 bool entry_created = false; 758 unsigned short family; 759 uintptr_t hash_key; 760 int err = 0; 761 762 parse_attr = attr->parse_attr; 763 tun_info = parse_attr->tun_info[out_index]; 764 mpls_info = &parse_attr->mpls_info[out_index]; 765 family = ip_tunnel_info_af(tun_info); 766 key.ip_tun_key = &tun_info->key; 767 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); 768 if (!key.tc_tunnel) { 769 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); 770 return -EOPNOTSUPP; 771 } 772 773 hash_key = hash_encap_info(&key); 774 775 mutex_lock(&esw->offloads.encap_tbl_lock); 776 e = mlx5e_encap_get(priv, &key, hash_key); 777 778 /* must verify if encap is valid or not */ 779 if (e) { 780 /* Check that entry was not already attached to this flow */ 781 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { 782 err = -EOPNOTSUPP; 783 goto out_err; 784 } 785 786 mutex_unlock(&esw->offloads.encap_tbl_lock); 787 wait_for_completion(&e->res_ready); 788 789 /* Protect against concurrent neigh update. */ 790 mutex_lock(&esw->offloads.encap_tbl_lock); 791 if (e->compl_result < 0) { 792 err = -EREMOTEIO; 793 goto out_err; 794 } 795 goto attach_flow; 796 } 797 798 e = kzalloc(sizeof(*e), GFP_KERNEL); 799 if (!e) { 800 err = -ENOMEM; 801 goto out_err; 802 } 803 804 refcount_set(&e->refcnt, 1); 805 init_completion(&e->res_ready); 806 entry_created = true; 807 INIT_LIST_HEAD(&e->route_list); 808 809 tun_info = mlx5e_dup_tun_info(tun_info); 810 if (!tun_info) { 811 err = -ENOMEM; 812 goto out_err_init; 813 } 814 e->tun_info = tun_info; 815 memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info)); 816 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); 817 if (err) 818 goto out_err_init; 819 820 INIT_LIST_HEAD(&e->flows); 821 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); 822 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 823 mutex_unlock(&esw->offloads.encap_tbl_lock); 824 825 if (family == AF_INET) 826 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); 827 else if (family == AF_INET6) 828 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); 829 830 /* Protect against concurrent neigh update. */ 831 mutex_lock(&esw->offloads.encap_tbl_lock); 832 complete_all(&e->res_ready); 833 if (err) { 834 e->compl_result = err; 835 goto out_err; 836 } 837 e->compl_result = 1; 838 839 attach_flow: 840 err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, 841 out_index); 842 if (err) 843 goto out_err; 844 845 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index); 846 if (err == -EOPNOTSUPP) { 847 /* If device doesn't support int port offload, 848 * redirect to uplink vport. 849 */ 850 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n"); 851 err = 0; 852 } else if (err) { 853 goto out_err; 854 } 855 856 flow->encaps[out_index].e = e; 857 list_add(&flow->encaps[out_index].list, &e->flows); 858 flow->encaps[out_index].index = out_index; 859 *encap_dev = e->out_dev; 860 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 861 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; 862 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 863 *encap_valid = true; 864 } else { 865 *encap_valid = false; 866 } 867 mutex_unlock(&esw->offloads.encap_tbl_lock); 868 869 return err; 870 871 out_err: 872 mutex_unlock(&esw->offloads.encap_tbl_lock); 873 if (e) 874 mlx5e_encap_put(priv, e); 875 return err; 876 877 out_err_init: 878 mutex_unlock(&esw->offloads.encap_tbl_lock); 879 kfree(tun_info); 880 kfree(e); 881 return err; 882 } 883 884 int mlx5e_attach_decap(struct mlx5e_priv *priv, 885 struct mlx5e_tc_flow *flow, 886 struct netlink_ext_ack *extack) 887 { 888 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 889 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 890 struct mlx5_pkt_reformat_params reformat_params; 891 struct mlx5e_tc_flow_parse_attr *parse_attr; 892 struct mlx5e_decap_entry *d; 893 struct mlx5e_decap_key key; 894 uintptr_t hash_key; 895 int err = 0; 896 897 parse_attr = flow->attr->parse_attr; 898 if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { 899 NL_SET_ERR_MSG_MOD(extack, 900 "encap header larger than max supported"); 901 return -EOPNOTSUPP; 902 } 903 904 key.key = parse_attr->eth; 905 hash_key = hash_decap_info(&key); 906 mutex_lock(&esw->offloads.decap_tbl_lock); 907 d = mlx5e_decap_get(priv, &key, hash_key); 908 if (d) { 909 mutex_unlock(&esw->offloads.decap_tbl_lock); 910 wait_for_completion(&d->res_ready); 911 mutex_lock(&esw->offloads.decap_tbl_lock); 912 if (d->compl_result) { 913 err = -EREMOTEIO; 914 goto out_free; 915 } 916 goto found; 917 } 918 919 d = kzalloc(sizeof(*d), GFP_KERNEL); 920 if (!d) { 921 err = -ENOMEM; 922 goto out_err; 923 } 924 925 d->key = key; 926 refcount_set(&d->refcnt, 1); 927 init_completion(&d->res_ready); 928 INIT_LIST_HEAD(&d->flows); 929 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); 930 mutex_unlock(&esw->offloads.decap_tbl_lock); 931 932 memset(&reformat_params, 0, sizeof(reformat_params)); 933 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; 934 reformat_params.size = sizeof(parse_attr->eth); 935 reformat_params.data = &parse_attr->eth; 936 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 937 &reformat_params, 938 MLX5_FLOW_NAMESPACE_FDB); 939 if (IS_ERR(d->pkt_reformat)) { 940 err = PTR_ERR(d->pkt_reformat); 941 d->compl_result = err; 942 } 943 mutex_lock(&esw->offloads.decap_tbl_lock); 944 complete_all(&d->res_ready); 945 if (err) 946 goto out_free; 947 948 found: 949 flow->decap_reformat = d; 950 attr->decap_pkt_reformat = d->pkt_reformat; 951 list_add(&flow->l3_to_l2_reformat, &d->flows); 952 mutex_unlock(&esw->offloads.decap_tbl_lock); 953 return 0; 954 955 out_free: 956 mutex_unlock(&esw->offloads.decap_tbl_lock); 957 mlx5e_decap_put(priv, d); 958 return err; 959 960 out_err: 961 mutex_unlock(&esw->offloads.decap_tbl_lock); 962 return err; 963 } 964 965 static int cmp_route_info(struct mlx5e_route_key *a, 966 struct mlx5e_route_key *b) 967 { 968 if (a->ip_version == 4 && b->ip_version == 4) 969 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, 970 sizeof(a->endpoint_ip.v4)); 971 else if (a->ip_version == 6 && b->ip_version == 6) 972 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, 973 sizeof(a->endpoint_ip.v6)); 974 return 1; 975 } 976 977 static u32 hash_route_info(struct mlx5e_route_key *key) 978 { 979 if (key->ip_version == 4) 980 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); 981 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); 982 } 983 984 static void mlx5e_route_dealloc(struct mlx5e_priv *priv, 985 struct mlx5e_route_entry *r) 986 { 987 WARN_ON(!list_empty(&r->decap_flows)); 988 WARN_ON(!list_empty(&r->encap_entries)); 989 990 kfree_rcu(r, rcu); 991 } 992 993 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 994 { 995 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 996 997 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) 998 return; 999 1000 hash_del_rcu(&r->hlist); 1001 mutex_unlock(&esw->offloads.encap_tbl_lock); 1002 1003 mlx5e_route_dealloc(priv, r); 1004 } 1005 1006 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) 1007 { 1008 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1009 1010 lockdep_assert_held(&esw->offloads.encap_tbl_lock); 1011 1012 if (!refcount_dec_and_test(&r->refcnt)) 1013 return; 1014 hash_del_rcu(&r->hlist); 1015 mlx5e_route_dealloc(priv, r); 1016 } 1017 1018 static struct mlx5e_route_entry * 1019 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, 1020 u32 hash_key) 1021 { 1022 struct mlx5e_route_key r_key; 1023 struct mlx5e_route_entry *r; 1024 1025 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { 1026 r_key = r->key; 1027 if (!cmp_route_info(&r_key, key) && 1028 refcount_inc_not_zero(&r->refcnt)) 1029 return r; 1030 } 1031 return NULL; 1032 } 1033 1034 static struct mlx5e_route_entry * 1035 mlx5e_route_get_create(struct mlx5e_priv *priv, 1036 struct mlx5e_route_key *key, 1037 int tunnel_dev_index, 1038 unsigned long *route_tbl_change_time) 1039 { 1040 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1041 struct mlx5_rep_uplink_priv *uplink_priv; 1042 struct mlx5e_rep_priv *uplink_rpriv; 1043 struct mlx5e_tc_tun_encap *encap; 1044 struct mlx5e_route_entry *r; 1045 u32 hash_key; 1046 1047 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1048 uplink_priv = &uplink_rpriv->uplink_priv; 1049 encap = uplink_priv->encap; 1050 1051 hash_key = hash_route_info(key); 1052 spin_lock_bh(&encap->route_lock); 1053 r = mlx5e_route_get(encap, key, hash_key); 1054 spin_unlock_bh(&encap->route_lock); 1055 if (r) { 1056 if (!mlx5e_route_entry_valid(r)) { 1057 mlx5e_route_put_locked(priv, r); 1058 return ERR_PTR(-EINVAL); 1059 } 1060 return r; 1061 } 1062 1063 r = kzalloc(sizeof(*r), GFP_KERNEL); 1064 if (!r) 1065 return ERR_PTR(-ENOMEM); 1066 1067 r->key = *key; 1068 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1069 r->tunnel_dev_index = tunnel_dev_index; 1070 refcount_set(&r->refcnt, 1); 1071 INIT_LIST_HEAD(&r->decap_flows); 1072 INIT_LIST_HEAD(&r->encap_entries); 1073 1074 spin_lock_bh(&encap->route_lock); 1075 *route_tbl_change_time = encap->route_tbl_last_update; 1076 hash_add(encap->route_tbl, &r->hlist, hash_key); 1077 spin_unlock_bh(&encap->route_lock); 1078 1079 return r; 1080 } 1081 1082 static struct mlx5e_route_entry * 1083 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) 1084 { 1085 u32 hash_key = hash_route_info(key); 1086 struct mlx5e_route_entry *r; 1087 1088 spin_lock_bh(&encap->route_lock); 1089 encap->route_tbl_last_update = jiffies; 1090 r = mlx5e_route_get(encap, key, hash_key); 1091 spin_unlock_bh(&encap->route_lock); 1092 1093 return r; 1094 } 1095 1096 struct mlx5e_tc_fib_event_data { 1097 struct work_struct work; 1098 unsigned long event; 1099 struct mlx5e_route_entry *r; 1100 struct net_device *ul_dev; 1101 }; 1102 1103 static void mlx5e_tc_fib_event_work(struct work_struct *work); 1104 static struct mlx5e_tc_fib_event_data * 1105 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) 1106 { 1107 struct mlx5e_tc_fib_event_data *fib_work; 1108 1109 fib_work = kzalloc(sizeof(*fib_work), flags); 1110 if (WARN_ON(!fib_work)) 1111 return NULL; 1112 1113 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); 1114 fib_work->event = event; 1115 fib_work->ul_dev = ul_dev; 1116 1117 return fib_work; 1118 } 1119 1120 static int 1121 mlx5e_route_enqueue_update(struct mlx5e_priv *priv, 1122 struct mlx5e_route_entry *r, 1123 unsigned long event) 1124 { 1125 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1126 struct mlx5e_tc_fib_event_data *fib_work; 1127 struct mlx5e_rep_priv *uplink_rpriv; 1128 struct net_device *ul_dev; 1129 1130 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1131 ul_dev = uplink_rpriv->netdev; 1132 1133 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); 1134 if (!fib_work) 1135 return -ENOMEM; 1136 1137 dev_hold(ul_dev); 1138 refcount_inc(&r->refcnt); 1139 fib_work->r = r; 1140 queue_work(priv->wq, &fib_work->work); 1141 1142 return 0; 1143 } 1144 1145 int mlx5e_attach_decap_route(struct mlx5e_priv *priv, 1146 struct mlx5e_tc_flow *flow) 1147 { 1148 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1149 unsigned long tbl_time_before, tbl_time_after; 1150 struct mlx5e_tc_flow_parse_attr *parse_attr; 1151 struct mlx5_flow_attr *attr = flow->attr; 1152 struct mlx5_esw_flow_attr *esw_attr; 1153 struct mlx5e_route_entry *r; 1154 struct mlx5e_route_key key; 1155 int err = 0; 1156 1157 esw_attr = attr->esw_attr; 1158 parse_attr = attr->parse_attr; 1159 mutex_lock(&esw->offloads.encap_tbl_lock); 1160 if (!esw_attr->rx_tun_attr) 1161 goto out; 1162 1163 tbl_time_before = mlx5e_route_tbl_get_last_update(priv); 1164 tbl_time_after = tbl_time_before; 1165 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev); 1166 if (err || !esw_attr->rx_tun_attr->decap_vport) 1167 goto out; 1168 1169 key.ip_version = attr->tun_ip_version; 1170 if (key.ip_version == 4) 1171 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; 1172 else 1173 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; 1174 1175 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, 1176 &tbl_time_after); 1177 if (IS_ERR(r)) { 1178 err = PTR_ERR(r); 1179 goto out; 1180 } 1181 /* Routing changed concurrently. FIB event handler might have missed new 1182 * entry, schedule update. 1183 */ 1184 if (tbl_time_before != tbl_time_after) { 1185 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1186 if (err) { 1187 mlx5e_route_put_locked(priv, r); 1188 goto out; 1189 } 1190 } 1191 1192 flow->decap_route = r; 1193 list_add(&flow->decap_routes, &r->decap_flows); 1194 mutex_unlock(&esw->offloads.encap_tbl_lock); 1195 return 0; 1196 1197 out: 1198 mutex_unlock(&esw->offloads.encap_tbl_lock); 1199 return err; 1200 } 1201 1202 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, 1203 struct mlx5e_tc_flow *flow, 1204 struct mlx5e_encap_entry *e, 1205 bool new_encap_entry, 1206 unsigned long tbl_time_before, 1207 int out_index) 1208 { 1209 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1210 unsigned long tbl_time_after = tbl_time_before; 1211 struct mlx5e_tc_flow_parse_attr *parse_attr; 1212 struct mlx5_flow_attr *attr = flow->attr; 1213 const struct ip_tunnel_info *tun_info; 1214 struct mlx5_esw_flow_attr *esw_attr; 1215 struct mlx5e_route_entry *r; 1216 struct mlx5e_route_key key; 1217 unsigned short family; 1218 int err = 0; 1219 1220 esw_attr = attr->esw_attr; 1221 parse_attr = attr->parse_attr; 1222 tun_info = parse_attr->tun_info[out_index]; 1223 family = ip_tunnel_info_af(tun_info); 1224 1225 if (family == AF_INET) { 1226 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; 1227 key.ip_version = 4; 1228 } else if (family == AF_INET6) { 1229 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; 1230 key.ip_version = 6; 1231 } 1232 1233 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, 1234 e->route_dev_ifindex, out_index); 1235 if (err || !(esw_attr->dests[out_index].flags & 1236 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) 1237 return err; 1238 1239 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], 1240 &tbl_time_after); 1241 if (IS_ERR(r)) 1242 return PTR_ERR(r); 1243 /* Routing changed concurrently. FIB event handler might have missed new 1244 * entry, schedule update. 1245 */ 1246 if (tbl_time_before != tbl_time_after) { 1247 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); 1248 if (err) { 1249 mlx5e_route_put_locked(priv, r); 1250 return err; 1251 } 1252 } 1253 1254 flow->encap_routes[out_index].r = r; 1255 if (new_encap_entry) 1256 list_add(&e->route_list, &r->encap_entries); 1257 flow->encap_routes[out_index].index = out_index; 1258 return 0; 1259 } 1260 1261 void mlx5e_detach_decap_route(struct mlx5e_priv *priv, 1262 struct mlx5e_tc_flow *flow) 1263 { 1264 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1265 struct mlx5e_route_entry *r = flow->decap_route; 1266 1267 if (!r) 1268 return; 1269 1270 mutex_lock(&esw->offloads.encap_tbl_lock); 1271 list_del(&flow->decap_routes); 1272 flow->decap_route = NULL; 1273 1274 if (!refcount_dec_and_test(&r->refcnt)) { 1275 mutex_unlock(&esw->offloads.encap_tbl_lock); 1276 return; 1277 } 1278 hash_del_rcu(&r->hlist); 1279 mutex_unlock(&esw->offloads.encap_tbl_lock); 1280 1281 mlx5e_route_dealloc(priv, r); 1282 } 1283 1284 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, 1285 struct mlx5e_tc_flow *flow, 1286 int out_index) 1287 { 1288 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; 1289 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1290 struct mlx5e_encap_entry *e, *tmp; 1291 1292 if (!r) 1293 return; 1294 1295 mutex_lock(&esw->offloads.encap_tbl_lock); 1296 flow->encap_routes[out_index].r = NULL; 1297 1298 if (!refcount_dec_and_test(&r->refcnt)) { 1299 mutex_unlock(&esw->offloads.encap_tbl_lock); 1300 return; 1301 } 1302 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) 1303 list_del_init(&e->route_list); 1304 hash_del_rcu(&r->hlist); 1305 mutex_unlock(&esw->offloads.encap_tbl_lock); 1306 1307 mlx5e_route_dealloc(priv, r); 1308 } 1309 1310 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, 1311 struct mlx5e_encap_entry *e, 1312 struct list_head *encap_flows) 1313 { 1314 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1315 struct mlx5e_tc_flow *flow; 1316 1317 list_for_each_entry(flow, encap_flows, tmp_list) { 1318 struct mlx5_flow_attr *attr = flow->attr; 1319 struct mlx5_esw_flow_attr *esw_attr; 1320 1321 if (!mlx5e_is_offloaded_flow(flow)) 1322 continue; 1323 esw_attr = attr->esw_attr; 1324 1325 if (flow_flag_test(flow, SLOW)) 1326 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1327 else 1328 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1329 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); 1330 attr->modify_hdr = NULL; 1331 1332 esw_attr->dests[flow->tmp_entry_index].flags &= 1333 ~MLX5_ESW_DEST_ENCAP_VALID; 1334 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; 1335 } 1336 1337 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; 1338 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1339 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 1340 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 1341 e->pkt_reformat = NULL; 1342 } 1343 } 1344 1345 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, 1346 struct net_device *tunnel_dev, 1347 struct mlx5e_encap_entry *e, 1348 struct list_head *encap_flows) 1349 { 1350 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1351 struct mlx5e_tc_flow *flow; 1352 int err; 1353 1354 err = ip_tunnel_info_af(e->tun_info) == AF_INET ? 1355 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : 1356 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); 1357 if (err) 1358 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); 1359 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; 1360 1361 list_for_each_entry(flow, encap_flows, tmp_list) { 1362 struct mlx5e_tc_flow_parse_attr *parse_attr; 1363 struct mlx5_flow_attr *attr = flow->attr; 1364 struct mlx5_esw_flow_attr *esw_attr; 1365 struct mlx5_flow_handle *rule; 1366 struct mlx5_flow_spec *spec; 1367 1368 if (flow_flag_test(flow, FAILED)) 1369 continue; 1370 1371 esw_attr = attr->esw_attr; 1372 parse_attr = attr->parse_attr; 1373 spec = &parse_attr->spec; 1374 1375 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, 1376 e->out_dev, e->route_dev_ifindex, 1377 flow->tmp_entry_index); 1378 if (err) { 1379 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); 1380 continue; 1381 } 1382 1383 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); 1384 if (err) { 1385 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", 1386 err); 1387 continue; 1388 } 1389 1390 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 1391 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; 1392 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 1393 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) 1394 goto offload_to_slow_path; 1395 /* update from slow path rule to encap rule */ 1396 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1397 if (IS_ERR(rule)) { 1398 err = PTR_ERR(rule); 1399 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 1400 err); 1401 } else { 1402 flow->rule[0] = rule; 1403 } 1404 } else { 1405 offload_to_slow_path: 1406 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 1407 /* mark the flow's encap dest as non-valid */ 1408 esw_attr->dests[flow->tmp_entry_index].flags &= 1409 ~MLX5_ESW_DEST_ENCAP_VALID; 1410 1411 if (IS_ERR(rule)) { 1412 err = PTR_ERR(rule); 1413 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 1414 err); 1415 } else { 1416 flow->rule[0] = rule; 1417 } 1418 } 1419 flow_flag_set(flow, OFFLOADED); 1420 } 1421 } 1422 1423 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, 1424 struct mlx5e_route_entry *r, 1425 struct list_head *flow_list, 1426 bool replace) 1427 { 1428 struct net_device *tunnel_dev; 1429 struct mlx5e_encap_entry *e; 1430 1431 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1432 if (!tunnel_dev) 1433 return -ENODEV; 1434 1435 list_for_each_entry(e, &r->encap_entries, route_list) { 1436 LIST_HEAD(encap_flows); 1437 1438 mlx5e_take_all_encap_flows(e, &encap_flows); 1439 if (list_empty(&encap_flows)) 1440 continue; 1441 1442 if (mlx5e_route_entry_valid(r)) 1443 mlx5e_invalidate_encap(priv, e, &encap_flows); 1444 1445 if (!replace) { 1446 list_splice(&encap_flows, flow_list); 1447 continue; 1448 } 1449 1450 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); 1451 list_splice(&encap_flows, flow_list); 1452 } 1453 1454 return 0; 1455 } 1456 1457 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, 1458 struct list_head *flow_list) 1459 { 1460 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1461 struct mlx5e_tc_flow *flow; 1462 1463 list_for_each_entry(flow, flow_list, tmp_list) 1464 if (mlx5e_is_offloaded_flow(flow)) 1465 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); 1466 } 1467 1468 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, 1469 struct list_head *decap_flows) 1470 { 1471 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1472 struct mlx5e_tc_flow *flow; 1473 1474 list_for_each_entry(flow, decap_flows, tmp_list) { 1475 struct mlx5e_tc_flow_parse_attr *parse_attr; 1476 struct mlx5_flow_attr *attr = flow->attr; 1477 struct mlx5_flow_handle *rule; 1478 struct mlx5_flow_spec *spec; 1479 int err; 1480 1481 if (flow_flag_test(flow, FAILED)) 1482 continue; 1483 1484 parse_attr = attr->parse_attr; 1485 spec = &parse_attr->spec; 1486 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev); 1487 if (err) { 1488 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", 1489 err); 1490 continue; 1491 } 1492 1493 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); 1494 if (IS_ERR(rule)) { 1495 err = PTR_ERR(rule); 1496 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", 1497 err); 1498 } else { 1499 flow->rule[0] = rule; 1500 flow_flag_set(flow, OFFLOADED); 1501 } 1502 } 1503 } 1504 1505 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, 1506 struct mlx5e_route_entry *r, 1507 struct list_head *flow_list, 1508 bool replace) 1509 { 1510 struct net_device *tunnel_dev; 1511 LIST_HEAD(decap_flows); 1512 1513 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); 1514 if (!tunnel_dev) 1515 return -ENODEV; 1516 1517 mlx5e_take_all_route_decap_flows(r, &decap_flows); 1518 if (mlx5e_route_entry_valid(r)) 1519 mlx5e_unoffload_flow_list(priv, &decap_flows); 1520 if (replace) 1521 mlx5e_reoffload_decap(priv, &decap_flows); 1522 1523 list_splice(&decap_flows, flow_list); 1524 1525 return 0; 1526 } 1527 1528 static void mlx5e_tc_fib_event_work(struct work_struct *work) 1529 { 1530 struct mlx5e_tc_fib_event_data *event_data = 1531 container_of(work, struct mlx5e_tc_fib_event_data, work); 1532 struct net_device *ul_dev = event_data->ul_dev; 1533 struct mlx5e_priv *priv = netdev_priv(ul_dev); 1534 struct mlx5e_route_entry *r = event_data->r; 1535 struct mlx5_eswitch *esw; 1536 LIST_HEAD(flow_list); 1537 bool replace; 1538 int err; 1539 1540 /* sync with concurrent neigh updates */ 1541 rtnl_lock(); 1542 esw = priv->mdev->priv.eswitch; 1543 mutex_lock(&esw->offloads.encap_tbl_lock); 1544 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; 1545 1546 if (!mlx5e_route_entry_valid(r) && !replace) 1547 goto out; 1548 1549 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); 1550 if (err) 1551 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", 1552 err); 1553 1554 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); 1555 if (err) 1556 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", 1557 err); 1558 1559 if (replace) 1560 r->flags |= MLX5E_ROUTE_ENTRY_VALID; 1561 out: 1562 mutex_unlock(&esw->offloads.encap_tbl_lock); 1563 rtnl_unlock(); 1564 1565 mlx5e_put_flow_list(priv, &flow_list); 1566 mlx5e_route_put(priv, event_data->r); 1567 dev_put(event_data->ul_dev); 1568 kfree(event_data); 1569 } 1570 1571 static struct mlx5e_tc_fib_event_data * 1572 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, 1573 struct net_device *ul_dev, 1574 struct mlx5e_tc_tun_encap *encap, 1575 unsigned long event, 1576 struct fib_notifier_info *info) 1577 { 1578 struct fib_entry_notifier_info *fen_info; 1579 struct mlx5e_tc_fib_event_data *fib_work; 1580 struct mlx5e_route_entry *r; 1581 struct mlx5e_route_key key; 1582 struct net_device *fib_dev; 1583 1584 fen_info = container_of(info, struct fib_entry_notifier_info, info); 1585 if (fen_info->fi->nh) 1586 return NULL; 1587 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; 1588 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || 1589 fen_info->dst_len != 32) 1590 return NULL; 1591 1592 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1593 if (!fib_work) 1594 return ERR_PTR(-ENOMEM); 1595 1596 key.endpoint_ip.v4 = htonl(fen_info->dst); 1597 key.ip_version = 4; 1598 1599 /* Can't fail after this point because releasing reference to r 1600 * requires obtaining sleeping mutex which we can't do in atomic 1601 * context. 1602 */ 1603 r = mlx5e_route_lookup_for_update(encap, &key); 1604 if (!r) 1605 goto out; 1606 fib_work->r = r; 1607 dev_hold(ul_dev); 1608 1609 return fib_work; 1610 1611 out: 1612 kfree(fib_work); 1613 return NULL; 1614 } 1615 1616 static struct mlx5e_tc_fib_event_data * 1617 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, 1618 struct net_device *ul_dev, 1619 struct mlx5e_tc_tun_encap *encap, 1620 unsigned long event, 1621 struct fib_notifier_info *info) 1622 { 1623 struct fib6_entry_notifier_info *fen_info; 1624 struct mlx5e_tc_fib_event_data *fib_work; 1625 struct mlx5e_route_entry *r; 1626 struct mlx5e_route_key key; 1627 struct net_device *fib_dev; 1628 1629 fen_info = container_of(info, struct fib6_entry_notifier_info, info); 1630 fib_dev = fib6_info_nh_dev(fen_info->rt); 1631 if (fib_dev->netdev_ops != &mlx5e_netdev_ops || 1632 fen_info->rt->fib6_dst.plen != 128) 1633 return NULL; 1634 1635 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); 1636 if (!fib_work) 1637 return ERR_PTR(-ENOMEM); 1638 1639 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, 1640 sizeof(fen_info->rt->fib6_dst.addr)); 1641 key.ip_version = 6; 1642 1643 /* Can't fail after this point because releasing reference to r 1644 * requires obtaining sleeping mutex which we can't do in atomic 1645 * context. 1646 */ 1647 r = mlx5e_route_lookup_for_update(encap, &key); 1648 if (!r) 1649 goto out; 1650 fib_work->r = r; 1651 dev_hold(ul_dev); 1652 1653 return fib_work; 1654 1655 out: 1656 kfree(fib_work); 1657 return NULL; 1658 } 1659 1660 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) 1661 { 1662 struct mlx5e_tc_fib_event_data *fib_work; 1663 struct fib_notifier_info *info = ptr; 1664 struct mlx5e_tc_tun_encap *encap; 1665 struct net_device *ul_dev; 1666 struct mlx5e_priv *priv; 1667 1668 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); 1669 priv = encap->priv; 1670 ul_dev = priv->netdev; 1671 priv = netdev_priv(ul_dev); 1672 1673 switch (event) { 1674 case FIB_EVENT_ENTRY_REPLACE: 1675 case FIB_EVENT_ENTRY_DEL: 1676 if (info->family == AF_INET) 1677 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); 1678 else if (info->family == AF_INET6) 1679 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); 1680 else 1681 return NOTIFY_DONE; 1682 1683 if (!IS_ERR_OR_NULL(fib_work)) { 1684 queue_work(priv->wq, &fib_work->work); 1685 } else if (IS_ERR(fib_work)) { 1686 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); 1687 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", 1688 PTR_ERR(fib_work)); 1689 } 1690 1691 break; 1692 default: 1693 return NOTIFY_DONE; 1694 } 1695 1696 return NOTIFY_DONE; 1697 } 1698 1699 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) 1700 { 1701 struct mlx5e_tc_tun_encap *encap; 1702 int err; 1703 1704 encap = kvzalloc(sizeof(*encap), GFP_KERNEL); 1705 if (!encap) 1706 return ERR_PTR(-ENOMEM); 1707 1708 encap->priv = priv; 1709 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; 1710 spin_lock_init(&encap->route_lock); 1711 hash_init(encap->route_tbl); 1712 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, 1713 NULL, NULL); 1714 if (err) { 1715 kvfree(encap); 1716 return ERR_PTR(err); 1717 } 1718 1719 return encap; 1720 } 1721 1722 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) 1723 { 1724 if (!encap) 1725 return; 1726 1727 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); 1728 flush_workqueue(encap->priv->wq); /* flush fib event works */ 1729 kvfree(encap); 1730 } 1731