1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 4 #include <linux/if_macvlan.h> 5 #include <linux/if_vlan.h> 6 #include <net/bareudp.h> 7 #include <net/bonding.h> 8 #include "act.h" 9 #include "vlan.h" 10 #include "en/tc_tun_encap.h" 11 #include "en/tc_priv.h" 12 #include "en_rep.h" 13 #include "lag/lag.h" 14 15 static bool 16 same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev) 17 { 18 return mlx5e_eswitch_vf_rep(priv->netdev) && 19 priv->netdev == out_dev; 20 } 21 22 static int 23 verify_uplink_forwarding(struct mlx5e_priv *priv, 24 struct mlx5_flow_attr *attr, 25 struct net_device *out_dev, 26 struct netlink_ext_ack *extack) 27 { 28 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 29 struct mlx5e_rep_priv *rep_priv; 30 31 /* Forwarding non encapsulated traffic between 32 * uplink ports is allowed only if 33 * termination_table_raw_traffic cap is set. 34 * 35 * Input vport was stored attr->in_rep. 36 * In LAG case, *priv* is the private data of 37 * uplink which may be not the input vport. 38 */ 39 rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep); 40 41 if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && 42 mlx5e_eswitch_uplink_rep(out_dev))) 43 return 0; 44 45 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, 46 termination_table_raw_traffic)) { 47 NL_SET_ERR_MSG_MOD(extack, 48 "devices are both uplink, can't offload forwarding"); 49 return -EOPNOTSUPP; 50 } else if (out_dev != rep_priv->netdev) { 51 NL_SET_ERR_MSG_MOD(extack, 52 "devices are not the same uplink, can't offload forwarding"); 53 return -EOPNOTSUPP; 54 } 55 return 0; 56 } 57 58 static bool 59 is_duplicated_output_device(struct net_device *dev, 60 struct net_device *out_dev, 61 int *ifindexes, int if_count, 62 struct netlink_ext_ack *extack) 63 { 64 int i; 65 66 for (i = 0; i < if_count; i++) { 67 if (ifindexes[i] == out_dev->ifindex) { 68 NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device"); 69 netdev_err(dev, "can't duplicate output to same device: %s\n", 70 out_dev->name); 71 return true; 72 } 73 } 74 75 return false; 76 } 77 78 static struct net_device * 79 get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev) 80 { 81 struct net_device *fdb_out_dev = out_dev; 82 struct net_device *uplink_upper; 83 84 rcu_read_lock(); 85 uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); 86 if (uplink_upper && netif_is_lag_master(uplink_upper) && 87 uplink_upper == out_dev) { 88 fdb_out_dev = uplink_dev; 89 } else if (netif_is_lag_master(out_dev)) { 90 fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); 91 if (fdb_out_dev && 92 (!mlx5e_eswitch_rep(fdb_out_dev) || 93 !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) 94 fdb_out_dev = NULL; 95 } 96 rcu_read_unlock(); 97 return fdb_out_dev; 98 } 99 100 static bool 101 tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, 102 const struct flow_action_entry *act, 103 int act_index, 104 struct mlx5_flow_attr *attr) 105 { 106 struct netlink_ext_ack *extack = parse_state->extack; 107 struct mlx5e_tc_flow *flow = parse_state->flow; 108 struct mlx5e_tc_flow_parse_attr *parse_attr; 109 struct net_device *out_dev = act->dev; 110 struct mlx5e_priv *priv = flow->priv; 111 struct mlx5_esw_flow_attr *esw_attr; 112 113 parse_attr = attr->parse_attr; 114 esw_attr = attr->esw_attr; 115 116 if (!out_dev) { 117 /* out_dev is NULL when filters with 118 * non-existing mirred device are replayed to 119 * the driver. 120 */ 121 return false; 122 } 123 124 if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) { 125 NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device"); 126 return false; 127 } 128 129 if (parse_state->eth_pop && !parse_state->mpls_push) { 130 NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push"); 131 return false; 132 } 133 134 if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) { 135 NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push"); 136 return false; 137 } 138 139 if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) { 140 /* Ignore forward to self rules generated 141 * by adding both mlx5 devs to the flow table 142 * block on a normal nft offload setup. 143 */ 144 return false; 145 } 146 147 if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { 148 NL_SET_ERR_MSG_MOD(extack, 149 "can't support more output ports, can't offload forwarding"); 150 netdev_warn(priv->netdev, 151 "can't support more than %d output ports, can't offload forwarding\n", 152 esw_attr->out_count); 153 return false; 154 } 155 156 if (parse_state->encap || 157 netdev_port_same_parent_id(priv->netdev, out_dev) || 158 netif_is_ovs_master(out_dev)) 159 return true; 160 161 if (parse_attr->filter_dev != priv->netdev) { 162 /* All mlx5 devices are called to configure 163 * high level device filters. Therefore, the 164 * *attempt* to install a filter on invalid 165 * eswitch should not trigger an explicit error 166 */ 167 return false; 168 } 169 170 NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); 171 172 return false; 173 } 174 175 static int 176 parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, 177 const struct flow_action_entry *act, 178 struct mlx5_flow_attr *attr) 179 { 180 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; 181 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 182 struct net_device *out_dev = act->dev; 183 184 parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex; 185 parse_attr->tun_info[esw_attr->out_count] = 186 mlx5e_dup_tun_info(parse_state->tun_info); 187 188 if (!parse_attr->tun_info[esw_attr->out_count]) 189 return -ENOMEM; 190 191 parse_state->encap = false; 192 193 if (parse_state->mpls_push) { 194 memcpy(&parse_attr->mpls_info[esw_attr->out_count], 195 &parse_state->mpls_info, sizeof(parse_state->mpls_info)); 196 parse_state->mpls_push = false; 197 } 198 esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; 199 esw_attr->out_count++; 200 /* attr->dests[].rep is resolved when we handle encap */ 201 202 return 0; 203 } 204 205 static int 206 parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, 207 const struct flow_action_entry *act, 208 struct mlx5e_priv *priv, 209 struct mlx5_flow_attr *attr) 210 { 211 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; 212 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 213 struct netlink_ext_ack *extack = parse_state->extack; 214 struct mlx5e_rep_priv *rpriv = priv->ppriv; 215 struct net_device *out_dev = act->dev; 216 struct net_device *uplink_dev; 217 struct mlx5e_priv *out_priv; 218 struct mlx5_eswitch *esw; 219 bool is_uplink_rep; 220 int *ifindexes; 221 int if_count; 222 int err; 223 224 esw = priv->mdev->priv.eswitch; 225 uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); 226 ifindexes = parse_state->ifindexes; 227 if_count = parse_state->if_count; 228 229 if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack)) 230 return -EOPNOTSUPP; 231 232 parse_state->ifindexes[if_count] = out_dev->ifindex; 233 parse_state->if_count++; 234 is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev); 235 err = mlx5_lag_do_mirred(priv->mdev, out_dev); 236 if (err) 237 return err; 238 239 out_dev = get_fdb_out_dev(uplink_dev, out_dev); 240 if (!out_dev) 241 return -ENODEV; 242 243 if (is_vlan_dev(out_dev)) { 244 err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack); 245 if (err) 246 return err; 247 } 248 249 if (is_vlan_dev(parse_attr->filter_dev)) { 250 err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack); 251 if (err) 252 return err; 253 } 254 255 if (netif_is_macvlan(out_dev)) 256 out_dev = macvlan_dev_real_dev(out_dev); 257 258 err = verify_uplink_forwarding(priv, attr, out_dev, extack); 259 if (err) 260 return err; 261 262 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { 263 NL_SET_ERR_MSG_MOD(extack, 264 "devices are not on same switch HW, can't offload forwarding"); 265 return -EOPNOTSUPP; 266 } 267 268 if (same_vf_reps(priv, out_dev)) { 269 NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself"); 270 return -EOPNOTSUPP; 271 } 272 273 out_priv = netdev_priv(out_dev); 274 rpriv = out_priv->ppriv; 275 esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; 276 esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; 277 278 /* If output device is bond master then rules are not explicit 279 * so we don't attempt to count them. 280 */ 281 if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && 282 MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) 283 attr->lag.count = true; 284 285 esw_attr->out_count++; 286 287 return 0; 288 } 289 290 static int 291 parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state, 292 const struct flow_action_entry *act, 293 struct mlx5e_priv *priv, 294 struct mlx5_flow_attr *attr) 295 { 296 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 297 struct net_device *out_dev = act->dev; 298 int err; 299 300 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, 301 MLX5E_TC_INT_PORT_EGRESS, 302 &attr->action, esw_attr->out_count); 303 if (err) 304 return err; 305 306 esw_attr->out_count++; 307 return 0; 308 } 309 310 static int 311 tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, 312 const struct flow_action_entry *act, 313 struct mlx5e_priv *priv, 314 struct mlx5_flow_attr *attr) 315 { 316 struct net_device *out_dev = act->dev; 317 int err = -EOPNOTSUPP; 318 319 if (parse_state->encap) 320 err = parse_mirred_encap(parse_state, act, attr); 321 else if (netdev_port_same_parent_id(priv->netdev, out_dev)) 322 err = parse_mirred(parse_state, act, priv, attr); 323 else if (netif_is_ovs_master(out_dev)) 324 err = parse_mirred_ovs_master(parse_state, act, priv, attr); 325 326 if (err) 327 return err; 328 329 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 330 331 return 0; 332 } 333 334 struct mlx5e_tc_act mlx5e_tc_act_mirred = { 335 .can_offload = tc_act_can_offload_mirred, 336 .parse_action = tc_act_parse_mirred, 337 }; 338