1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies. */ 3 4 #include <net/dst_metadata.h> 5 #include <linux/netdevice.h> 6 #include <linux/if_macvlan.h> 7 #include <linux/list.h> 8 #include <linux/rculist.h> 9 #include <linux/rtnetlink.h> 10 #include <linux/workqueue.h> 11 #include <linux/spinlock.h> 12 #include "tc.h" 13 #include "neigh.h" 14 #include "en_rep.h" 15 #include "eswitch.h" 16 #include "lib/fs_chains.h" 17 #include "en/tc_ct.h" 18 #include "en/mapping.h" 19 #include "en/tc_tun.h" 20 #include "lib/port_tun.h" 21 #include "en/tc/sample.h" 22 #include "en_accel/ipsec_rxtx.h" 23 #include "en/tc/int_port.h" 24 #include "en/tc/act/act.h" 25 26 struct mlx5e_rep_indr_block_priv { 27 struct net_device *netdev; 28 struct mlx5e_rep_priv *rpriv; 29 enum flow_block_binder_type binder_type; 30 31 struct list_head list; 32 }; 33 34 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, 35 struct mlx5e_encap_entry *e, 36 struct mlx5e_neigh *m_neigh, 37 struct net_device *neigh_dev) 38 { 39 struct mlx5e_rep_priv *rpriv = priv->ppriv; 40 struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; 41 struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; 42 struct mlx5e_neigh_hash_entry *nhe; 43 int err; 44 45 err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); 46 if (err) 47 return err; 48 49 mutex_lock(&rpriv->neigh_update.encap_lock); 50 nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh); 51 if (!nhe) { 52 err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe); 53 if (err) { 54 mutex_unlock(&rpriv->neigh_update.encap_lock); 55 mlx5_tun_entropy_refcount_dec(tun_entropy, 56 e->reformat_type); 57 return err; 58 } 59 } 60 61 e->nhe = nhe; 62 spin_lock(&nhe->encap_list_lock); 63 list_add_rcu(&e->encap_list, &nhe->encap_list); 64 spin_unlock(&nhe->encap_list_lock); 65 66 mutex_unlock(&rpriv->neigh_update.encap_lock); 67 68 return 0; 69 } 70 71 void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, 72 struct mlx5e_encap_entry *e) 73 { 74 struct mlx5e_rep_priv *rpriv = priv->ppriv; 75 struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; 76 struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; 77 78 if (!e->nhe) 79 return; 80 81 spin_lock(&e->nhe->encap_list_lock); 82 list_del_rcu(&e->encap_list); 83 spin_unlock(&e->nhe->encap_list_lock); 84 85 mlx5e_rep_neigh_entry_release(e->nhe); 86 e->nhe = NULL; 87 mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); 88 } 89 90 void mlx5e_rep_update_flows(struct mlx5e_priv *priv, 91 struct mlx5e_encap_entry *e, 92 bool neigh_connected, 93 unsigned char ha[ETH_ALEN]) 94 { 95 struct ethhdr *eth = (struct ethhdr *)e->encap_header; 96 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 97 bool encap_connected; 98 LIST_HEAD(flow_list); 99 100 ASSERT_RTNL(); 101 102 mutex_lock(&esw->offloads.encap_tbl_lock); 103 encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); 104 if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha)) 105 goto unlock; 106 107 mlx5e_take_all_encap_flows(e, &flow_list); 108 109 if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && 110 (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) 111 mlx5e_tc_encap_flows_del(priv, e, &flow_list); 112 113 if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { 114 struct net_device *route_dev; 115 116 ether_addr_copy(e->h_dest, ha); 117 ether_addr_copy(eth->h_dest, ha); 118 /* Update the encap source mac, in case that we delete 119 * the flows when encap source mac changed. 120 */ 121 route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex); 122 if (route_dev) 123 ether_addr_copy(eth->h_source, route_dev->dev_addr); 124 125 mlx5e_tc_encap_flows_add(priv, e, &flow_list); 126 } 127 unlock: 128 mutex_unlock(&esw->offloads.encap_tbl_lock); 129 mlx5e_put_flow_list(priv, &flow_list); 130 } 131 132 static int 133 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, 134 struct flow_cls_offload *cls_flower, int flags) 135 { 136 switch (cls_flower->command) { 137 case FLOW_CLS_REPLACE: 138 return mlx5e_configure_flower(priv->netdev, priv, cls_flower, 139 flags); 140 case FLOW_CLS_DESTROY: 141 return mlx5e_delete_flower(priv->netdev, priv, cls_flower, 142 flags); 143 case FLOW_CLS_STATS: 144 return mlx5e_stats_flower(priv->netdev, priv, cls_flower, 145 flags); 146 default: 147 return -EOPNOTSUPP; 148 } 149 } 150 151 static 152 int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, 153 struct tc_cls_matchall_offload *ma) 154 { 155 switch (ma->command) { 156 case TC_CLSMATCHALL_REPLACE: 157 return mlx5e_tc_configure_matchall(priv, ma); 158 case TC_CLSMATCHALL_DESTROY: 159 return mlx5e_tc_delete_matchall(priv, ma); 160 case TC_CLSMATCHALL_STATS: 161 mlx5e_tc_stats_matchall(priv, ma); 162 return 0; 163 default: 164 return -EOPNOTSUPP; 165 } 166 } 167 168 static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, 169 void *cb_priv) 170 { 171 unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); 172 struct mlx5e_priv *priv = cb_priv; 173 174 if (!priv->netdev || !netif_device_present(priv->netdev)) 175 return -EOPNOTSUPP; 176 177 switch (type) { 178 case TC_SETUP_CLSFLOWER: 179 return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); 180 case TC_SETUP_CLSMATCHALL: 181 return mlx5e_rep_setup_tc_cls_matchall(priv, type_data); 182 default: 183 return -EOPNOTSUPP; 184 } 185 } 186 187 static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, 188 void *cb_priv) 189 { 190 struct flow_cls_offload tmp, *f = type_data; 191 struct mlx5e_priv *priv = cb_priv; 192 struct mlx5_eswitch *esw; 193 unsigned long flags; 194 int err; 195 196 flags = MLX5_TC_FLAG(INGRESS) | 197 MLX5_TC_FLAG(ESW_OFFLOAD) | 198 MLX5_TC_FLAG(FT_OFFLOAD); 199 esw = priv->mdev->priv.eswitch; 200 201 switch (type) { 202 case TC_SETUP_CLSFLOWER: 203 memcpy(&tmp, f, sizeof(*f)); 204 205 if (!mlx5_chains_prios_supported(esw_chains(esw))) 206 return -EOPNOTSUPP; 207 208 /* Re-use tc offload path by moving the ft flow to the 209 * reserved ft chain. 210 * 211 * FT offload can use prio range [0, INT_MAX], so we normalize 212 * it to range [1, mlx5_esw_chains_get_prio_range(esw)] 213 * as with tc, where prio 0 isn't supported. 214 * 215 * We only support chain 0 of FT offload. 216 */ 217 if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw))) 218 return -EOPNOTSUPP; 219 if (tmp.common.chain_index != 0) 220 return -EOPNOTSUPP; 221 222 tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw)); 223 tmp.common.prio++; 224 err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags); 225 memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); 226 return err; 227 default: 228 return -EOPNOTSUPP; 229 } 230 } 231 232 static LIST_HEAD(mlx5e_rep_block_tc_cb_list); 233 static LIST_HEAD(mlx5e_rep_block_ft_cb_list); 234 int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, 235 void *type_data) 236 { 237 struct mlx5e_priv *priv = netdev_priv(dev); 238 struct flow_block_offload *f = type_data; 239 240 f->unlocked_driver_cb = true; 241 242 switch (type) { 243 case TC_SETUP_BLOCK: 244 return flow_block_cb_setup_simple(type_data, 245 &mlx5e_rep_block_tc_cb_list, 246 mlx5e_rep_setup_tc_cb, 247 priv, priv, true); 248 case TC_SETUP_FT: 249 return flow_block_cb_setup_simple(type_data, 250 &mlx5e_rep_block_ft_cb_list, 251 mlx5e_rep_setup_ft_cb, 252 priv, priv, true); 253 default: 254 return -EOPNOTSUPP; 255 } 256 } 257 258 int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) 259 { 260 struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; 261 int err; 262 263 mutex_init(&uplink_priv->unready_flows_lock); 264 INIT_LIST_HEAD(&uplink_priv->unready_flows); 265 266 /* init shared tc flow table */ 267 err = mlx5e_tc_esw_init(uplink_priv); 268 return err; 269 } 270 271 void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) 272 { 273 /* delete shared tc flow table */ 274 mlx5e_tc_esw_cleanup(&rpriv->uplink_priv); 275 mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); 276 } 277 278 void mlx5e_rep_tc_enable(struct mlx5e_priv *priv) 279 { 280 struct mlx5e_rep_priv *rpriv = priv->ppriv; 281 282 INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work, 283 mlx5e_tc_reoffload_flows_work); 284 } 285 286 void mlx5e_rep_tc_disable(struct mlx5e_priv *priv) 287 { 288 struct mlx5e_rep_priv *rpriv = priv->ppriv; 289 290 cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); 291 } 292 293 int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) 294 { 295 struct mlx5e_rep_priv *rpriv = priv->ppriv; 296 297 queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work); 298 299 return NOTIFY_OK; 300 } 301 302 static struct mlx5e_rep_indr_block_priv * 303 mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, 304 struct net_device *netdev, 305 enum flow_block_binder_type binder_type) 306 { 307 struct mlx5e_rep_indr_block_priv *cb_priv; 308 309 list_for_each_entry(cb_priv, 310 &rpriv->uplink_priv.tc_indr_block_priv_list, 311 list) 312 if (cb_priv->netdev == netdev && 313 cb_priv->binder_type == binder_type) 314 return cb_priv; 315 316 return NULL; 317 } 318 319 static int 320 mlx5e_rep_indr_offload(struct net_device *netdev, 321 struct flow_cls_offload *flower, 322 struct mlx5e_rep_indr_block_priv *indr_priv, 323 unsigned long flags) 324 { 325 struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); 326 int err = 0; 327 328 if (!netif_device_present(indr_priv->rpriv->netdev)) 329 return -EOPNOTSUPP; 330 331 switch (flower->command) { 332 case FLOW_CLS_REPLACE: 333 err = mlx5e_configure_flower(netdev, priv, flower, flags); 334 break; 335 case FLOW_CLS_DESTROY: 336 err = mlx5e_delete_flower(netdev, priv, flower, flags); 337 break; 338 case FLOW_CLS_STATS: 339 err = mlx5e_stats_flower(netdev, priv, flower, flags); 340 break; 341 default: 342 err = -EOPNOTSUPP; 343 } 344 345 return err; 346 } 347 348 static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, 349 void *type_data, void *indr_priv) 350 { 351 unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD); 352 struct mlx5e_rep_indr_block_priv *priv = indr_priv; 353 354 flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ? 355 MLX5_TC_FLAG(EGRESS) : 356 MLX5_TC_FLAG(INGRESS); 357 358 switch (type) { 359 case TC_SETUP_CLSFLOWER: 360 return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, 361 flags); 362 default: 363 return -EOPNOTSUPP; 364 } 365 } 366 367 static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, 368 void *type_data, void *indr_priv) 369 { 370 struct mlx5e_rep_indr_block_priv *priv = indr_priv; 371 struct flow_cls_offload *f = type_data; 372 struct flow_cls_offload tmp; 373 struct mlx5e_priv *mpriv; 374 struct mlx5_eswitch *esw; 375 unsigned long flags; 376 int err; 377 378 mpriv = netdev_priv(priv->rpriv->netdev); 379 esw = mpriv->mdev->priv.eswitch; 380 381 flags = MLX5_TC_FLAG(EGRESS) | 382 MLX5_TC_FLAG(ESW_OFFLOAD) | 383 MLX5_TC_FLAG(FT_OFFLOAD); 384 385 switch (type) { 386 case TC_SETUP_CLSFLOWER: 387 memcpy(&tmp, f, sizeof(*f)); 388 389 /* Re-use tc offload path by moving the ft flow to the 390 * reserved ft chain. 391 * 392 * FT offload can use prio range [0, INT_MAX], so we normalize 393 * it to range [1, mlx5_esw_chains_get_prio_range(esw)] 394 * as with tc, where prio 0 isn't supported. 395 * 396 * We only support chain 0 of FT offload. 397 */ 398 if (!mlx5_chains_prios_supported(esw_chains(esw)) || 399 tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) || 400 tmp.common.chain_index) 401 return -EOPNOTSUPP; 402 403 tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw)); 404 tmp.common.prio++; 405 err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); 406 memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); 407 return err; 408 default: 409 return -EOPNOTSUPP; 410 } 411 } 412 413 static void mlx5e_rep_indr_block_unbind(void *cb_priv) 414 { 415 struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; 416 417 list_del(&indr_priv->list); 418 kfree(indr_priv); 419 } 420 421 static LIST_HEAD(mlx5e_block_cb_list); 422 423 static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev) 424 { 425 struct macvlan_dev *macvlan = netdev_priv(dev); 426 427 return macvlan->mode == MACVLAN_MODE_PASSTHRU; 428 } 429 430 static int 431 mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, 432 struct mlx5e_rep_priv *rpriv, 433 struct flow_block_offload *f, 434 flow_setup_cb_t *setup_cb, 435 void *data, 436 void (*cleanup)(struct flow_block_cb *block_cb)) 437 { 438 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 439 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 440 bool is_ovs_int_port = netif_is_ovs_master(netdev); 441 struct mlx5e_rep_indr_block_priv *indr_priv; 442 struct flow_block_cb *block_cb; 443 444 if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && 445 !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) && 446 !is_ovs_int_port) { 447 if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev)) 448 return -EOPNOTSUPP; 449 if (!mlx5e_rep_macvlan_mode_supported(netdev)) { 450 netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode"); 451 return -EOPNOTSUPP; 452 } 453 } 454 455 if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && 456 f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) 457 return -EOPNOTSUPP; 458 459 if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port) 460 return -EOPNOTSUPP; 461 462 if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw)) 463 return -EOPNOTSUPP; 464 465 f->unlocked_driver_cb = true; 466 f->driver_block_list = &mlx5e_block_cb_list; 467 468 switch (f->command) { 469 case FLOW_BLOCK_BIND: 470 indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type); 471 if (indr_priv) 472 return -EEXIST; 473 474 indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); 475 if (!indr_priv) 476 return -ENOMEM; 477 478 indr_priv->netdev = netdev; 479 indr_priv->rpriv = rpriv; 480 indr_priv->binder_type = f->binder_type; 481 list_add(&indr_priv->list, 482 &rpriv->uplink_priv.tc_indr_block_priv_list); 483 484 block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv, 485 mlx5e_rep_indr_block_unbind, 486 f, netdev, sch, data, rpriv, 487 cleanup); 488 if (IS_ERR(block_cb)) { 489 list_del(&indr_priv->list); 490 kfree(indr_priv); 491 return PTR_ERR(block_cb); 492 } 493 flow_block_cb_add(block_cb, f); 494 list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list); 495 496 return 0; 497 case FLOW_BLOCK_UNBIND: 498 indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type); 499 if (!indr_priv) 500 return -ENOENT; 501 502 block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv); 503 if (!block_cb) 504 return -ENOENT; 505 506 flow_indr_block_cb_remove(block_cb, f); 507 list_del(&block_cb->driver_list); 508 return 0; 509 default: 510 return -EOPNOTSUPP; 511 } 512 return 0; 513 } 514 515 static int 516 mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv, 517 struct flow_offload_action *fl_act) 518 519 { 520 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 521 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 522 enum mlx5_flow_namespace_type ns_type; 523 struct flow_action_entry *action; 524 struct mlx5e_tc_act *act; 525 bool add = false; 526 int i; 527 528 /* There is no use case currently for more than one action (e.g. pedit). 529 * when there will be, need to handle cleaning multiple actions on err. 530 */ 531 if (!flow_offload_has_one_action(&fl_act->action)) 532 return -EOPNOTSUPP; 533 534 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 535 ns_type = MLX5_FLOW_NAMESPACE_FDB; 536 else 537 ns_type = MLX5_FLOW_NAMESPACE_KERNEL; 538 539 flow_action_for_each(i, action, &fl_act->action) { 540 act = mlx5e_tc_act_get(action->id, ns_type); 541 if (!act) 542 continue; 543 544 if (!act->offload_action) 545 continue; 546 547 if (!act->offload_action(priv, fl_act, action)) 548 add = true; 549 } 550 551 return add ? 0 : -EOPNOTSUPP; 552 } 553 554 static int 555 mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv, 556 struct flow_offload_action *fl_act) 557 { 558 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 559 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 560 enum mlx5_flow_namespace_type ns_type; 561 struct mlx5e_tc_act *act; 562 563 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 564 ns_type = MLX5_FLOW_NAMESPACE_FDB; 565 else 566 ns_type = MLX5_FLOW_NAMESPACE_KERNEL; 567 568 act = mlx5e_tc_act_get(fl_act->id, ns_type); 569 if (!act || !act->destroy_action) 570 return -EOPNOTSUPP; 571 572 return act->destroy_action(priv, fl_act); 573 } 574 575 static int 576 mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv, 577 struct flow_offload_action *fl_act) 578 579 { 580 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 581 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 582 enum mlx5_flow_namespace_type ns_type; 583 struct mlx5e_tc_act *act; 584 585 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 586 ns_type = MLX5_FLOW_NAMESPACE_FDB; 587 else 588 ns_type = MLX5_FLOW_NAMESPACE_KERNEL; 589 590 act = mlx5e_tc_act_get(fl_act->id, ns_type); 591 if (!act || !act->stats_action) 592 return -EOPNOTSUPP; 593 594 return act->stats_action(priv, fl_act); 595 } 596 597 static int 598 mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv, 599 struct flow_offload_action *fl_act) 600 { 601 switch (fl_act->command) { 602 case FLOW_ACT_REPLACE: 603 return mlx5e_rep_indr_replace_act(rpriv, fl_act); 604 case FLOW_ACT_DESTROY: 605 return mlx5e_rep_indr_destroy_act(rpriv, fl_act); 606 case FLOW_ACT_STATS: 607 return mlx5e_rep_indr_stats_act(rpriv, fl_act); 608 default: 609 return -EOPNOTSUPP; 610 } 611 } 612 613 static int 614 mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv, 615 enum tc_setup_type type, 616 void *data) 617 { 618 if (!data) 619 return -EOPNOTSUPP; 620 621 switch (type) { 622 case TC_SETUP_ACT: 623 return mlx5e_rep_indr_setup_act(rpriv, data); 624 default: 625 return -EOPNOTSUPP; 626 } 627 } 628 629 static 630 int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, 631 enum tc_setup_type type, void *type_data, 632 void *data, 633 void (*cleanup)(struct flow_block_cb *block_cb)) 634 { 635 if (!netdev) 636 return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data); 637 638 switch (type) { 639 case TC_SETUP_BLOCK: 640 return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, 641 mlx5e_rep_indr_setup_tc_cb, 642 data, cleanup); 643 case TC_SETUP_FT: 644 return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, 645 mlx5e_rep_indr_setup_ft_cb, 646 data, cleanup); 647 default: 648 return -EOPNOTSUPP; 649 } 650 } 651 652 int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) 653 { 654 struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; 655 656 /* init indirect block notifications */ 657 INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); 658 659 return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv); 660 } 661 662 void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) 663 { 664 flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv, 665 mlx5e_rep_indr_block_unbind); 666 } 667 668 static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, 669 struct mlx5e_tc_update_priv *tc_priv, 670 u32 tunnel_id) 671 { 672 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 673 struct tunnel_match_enc_opts enc_opts = {}; 674 struct mlx5_rep_uplink_priv *uplink_priv; 675 struct mlx5e_rep_priv *uplink_rpriv; 676 struct metadata_dst *tun_dst; 677 struct tunnel_match_key key; 678 u32 tun_id, enc_opts_id; 679 struct net_device *dev; 680 int err; 681 682 enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; 683 tun_id = tunnel_id >> ENC_OPTS_BITS; 684 685 if (!tun_id) 686 return true; 687 688 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 689 uplink_priv = &uplink_rpriv->uplink_priv; 690 691 err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); 692 if (err) { 693 netdev_dbg(priv->netdev, 694 "Couldn't find tunnel for tun_id: %d, err: %d\n", 695 tun_id, err); 696 return false; 697 } 698 699 if (enc_opts_id) { 700 err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, 701 enc_opts_id, &enc_opts); 702 if (err) { 703 netdev_dbg(priv->netdev, 704 "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", 705 enc_opts_id, err); 706 return false; 707 } 708 } 709 710 if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 711 tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, 712 key.enc_ip.tos, key.enc_ip.ttl, 713 key.enc_tp.dst, TUNNEL_KEY, 714 key32_to_tunnel_id(key.enc_key_id.keyid), 715 enc_opts.key.len); 716 } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 717 tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, 718 key.enc_ip.tos, key.enc_ip.ttl, 719 key.enc_tp.dst, 0, TUNNEL_KEY, 720 key32_to_tunnel_id(key.enc_key_id.keyid), 721 enc_opts.key.len); 722 } else { 723 netdev_dbg(priv->netdev, 724 "Couldn't restore tunnel, unsupported addr_type: %d\n", 725 key.enc_control.addr_type); 726 return false; 727 } 728 729 if (!tun_dst) { 730 netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); 731 return false; 732 } 733 734 tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; 735 736 if (enc_opts.key.len) 737 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 738 enc_opts.key.data, 739 enc_opts.key.len, 740 enc_opts.key.dst_opt_type); 741 742 skb_dst_set(skb, (struct dst_entry *)tun_dst); 743 dev = dev_get_by_index(&init_net, key.filter_ifindex); 744 if (!dev) { 745 netdev_dbg(priv->netdev, 746 "Couldn't find tunnel device with ifindex: %d\n", 747 key.filter_ifindex); 748 return false; 749 } 750 751 /* Set fwd_dev so we do dev_put() after datapath */ 752 tc_priv->fwd_dev = dev; 753 754 skb->dev = dev; 755 756 return true; 757 } 758 759 static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1, 760 struct mlx5e_tc_update_priv *tc_priv) 761 { 762 struct mlx5e_priv *priv = netdev_priv(skb->dev); 763 u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; 764 765 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 766 if (chain) { 767 struct mlx5_rep_uplink_priv *uplink_priv; 768 struct mlx5e_rep_priv *uplink_rpriv; 769 struct tc_skb_ext *tc_skb_ext; 770 struct mlx5_eswitch *esw; 771 u32 zone_restore_id; 772 773 tc_skb_ext = tc_skb_ext_alloc(skb); 774 if (!tc_skb_ext) { 775 WARN_ON(1); 776 return false; 777 } 778 tc_skb_ext->chain = chain; 779 zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; 780 esw = priv->mdev->priv.eswitch; 781 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 782 uplink_priv = &uplink_rpriv->uplink_priv; 783 if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb, 784 zone_restore_id)) 785 return false; 786 } 787 #endif /* CONFIG_NET_TC_SKB_EXT */ 788 789 return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); 790 } 791 792 static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) 793 { 794 if (tc_priv->fwd_dev) 795 dev_put(tc_priv->fwd_dev); 796 } 797 798 static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, 799 struct mlx5_mapped_obj *mapped_obj, 800 struct mlx5e_tc_update_priv *tc_priv) 801 { 802 if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { 803 netdev_dbg(priv->netdev, 804 "Failed to restore tunnel info for sampled packet\n"); 805 return; 806 } 807 mlx5e_tc_sample_skb(skb, mapped_obj); 808 mlx5_rep_tc_post_napi_receive(tc_priv); 809 } 810 811 static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, 812 struct mlx5_mapped_obj *mapped_obj, 813 struct mlx5e_tc_update_priv *tc_priv, 814 bool *forward_tx, 815 u32 reg_c1) 816 { 817 u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; 818 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 819 struct mlx5_rep_uplink_priv *uplink_priv; 820 struct mlx5e_rep_priv *uplink_rpriv; 821 822 /* Tunnel restore takes precedence over int port restore */ 823 if (tunnel_id) 824 return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); 825 826 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 827 uplink_priv = &uplink_rpriv->uplink_priv; 828 829 if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, 830 mapped_obj->int_port_metadata, forward_tx)) { 831 /* Set fwd_dev for future dev_put */ 832 tc_priv->fwd_dev = skb->dev; 833 834 return true; 835 } 836 837 return false; 838 } 839 840 void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, 841 struct sk_buff *skb) 842 { 843 u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); 844 struct mlx5e_tc_update_priv tc_priv = {}; 845 struct mlx5_mapped_obj mapped_obj; 846 struct mlx5_eswitch *esw; 847 bool forward_tx = false; 848 struct mlx5e_priv *priv; 849 u32 reg_c0; 850 int err; 851 852 reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); 853 if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) 854 goto forward; 855 856 /* If reg_c0 is not equal to the default flow tag then skb->mark 857 * is not supported and must be reset back to 0. 858 */ 859 skb->mark = 0; 860 861 priv = netdev_priv(skb->dev); 862 esw = priv->mdev->priv.eswitch; 863 err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj); 864 if (err) { 865 netdev_dbg(priv->netdev, 866 "Couldn't find mapped object for reg_c0: %d, err: %d\n", 867 reg_c0, err); 868 goto free_skb; 869 } 870 871 if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { 872 if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) && 873 !mlx5_ipsec_is_rx_flow(cqe)) 874 goto free_skb; 875 } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { 876 mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv); 877 goto free_skb; 878 } else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) { 879 if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv, 880 &forward_tx, reg_c1)) 881 goto free_skb; 882 } else { 883 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); 884 goto free_skb; 885 } 886 887 forward: 888 if (forward_tx) 889 dev_queue_xmit(skb); 890 else 891 napi_gro_receive(rq->cq.napi, skb); 892 893 mlx5_rep_tc_post_napi_receive(&tc_priv); 894 895 return; 896 897 free_skb: 898 dev_kfree_skb_any(skb); 899 } 900