1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
42 #include <net/arp.h>
43 #include <net/ipv6_stubs.h>
44 #include <net/bareudp.h>
45 #include <net/bonding.h>
46 #include <net/dst_metadata.h>
47 #include "devlink.h"
48 #include "en.h"
49 #include "en/tc/post_act.h"
50 #include "en/tc/act_stats.h"
51 #include "en_rep.h"
52 #include "en/rep/tc.h"
53 #include "en/rep/neigh.h"
54 #include "en_tc.h"
55 #include "eswitch.h"
56 #include "fs_core.h"
57 #include "en/port.h"
58 #include "en/tc_tun.h"
59 #include "en/mapping.h"
60 #include "en/tc_ct.h"
61 #include "en/mod_hdr.h"
62 #include "en/tc_tun_encap.h"
63 #include "en/tc/sample.h"
64 #include "en/tc/act/act.h"
65 #include "en/tc/post_meter.h"
66 #include "lib/devcom.h"
67 #include "lib/geneve.h"
68 #include "lib/fs_chains.h"
69 #include "diag/en_tc_tracepoint.h"
70 #include <asm/div64.h>
71 #include "lag/lag.h"
72 #include "lag/mp.h"
73
74 #define MLX5E_TC_TABLE_NUM_GROUPS 4
75 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
76
77 struct mlx5e_tc_table {
78 /* Protects the dynamic assignment of the t parameter
79 * which is the nic tc root table.
80 */
81 struct mutex t_lock;
82 struct mlx5e_priv *priv;
83 struct mlx5_flow_table *t;
84 struct mlx5_flow_table *miss_t;
85 struct mlx5_fs_chains *chains;
86 struct mlx5e_post_act *post_act;
87
88 struct rhashtable ht;
89
90 struct mod_hdr_tbl mod_hdr;
91 struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
92 DECLARE_HASHTABLE(hairpin_tbl, 8);
93
94 struct notifier_block netdevice_nb;
95 struct netdev_net_notifier netdevice_nn;
96
97 struct mlx5_tc_ct_priv *ct;
98 struct mapping_ctx *mapping;
99 struct dentry *dfs_root;
100
101 /* tc action stats */
102 struct mlx5e_tc_act_stats_handle *action_stats_handle;
103 };
104
105 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
106 [MAPPED_OBJ_TO_REG] = {
107 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
108 .moffset = 0,
109 .mlen = 16,
110 },
111 [VPORT_TO_REG] = {
112 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
113 .moffset = 16,
114 .mlen = 16,
115 },
116 [TUNNEL_TO_REG] = {
117 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
118 .moffset = 8,
119 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
120 .soffset = MLX5_BYTE_OFF(fte_match_param,
121 misc_parameters_2.metadata_reg_c_1),
122 },
123 [ZONE_TO_REG] = zone_to_reg_ct,
124 [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
125 [CTSTATE_TO_REG] = ctstate_to_reg_ct,
126 [MARK_TO_REG] = mark_to_reg_ct,
127 [LABELS_TO_REG] = labels_to_reg_ct,
128 [FTEID_TO_REG] = fteid_to_reg_ct,
129 /* For NIC rules we store the restore metadata directly
130 * into reg_b that is passed to SW since we don't
131 * jump between steering domains.
132 */
133 [NIC_MAPPED_OBJ_TO_REG] = {
134 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
135 .moffset = 0,
136 .mlen = 16,
137 },
138 [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
139 [PACKET_COLOR_TO_REG] = packet_color_to_reg,
140 };
141
142 struct mlx5e_tc_jump_state {
143 u32 jump_count;
144 bool jump_target;
145 struct mlx5_flow_attr *jumping_attr;
146
147 enum flow_action_id last_id;
148 u32 last_index;
149 };
150
mlx5e_tc_table_alloc(void)151 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
152 {
153 struct mlx5e_tc_table *tc;
154
155 tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
156 return tc ? tc : ERR_PTR(-ENOMEM);
157 }
158
mlx5e_tc_table_free(struct mlx5e_tc_table * tc)159 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
160 {
161 kvfree(tc);
162 }
163
mlx5e_nic_chains(struct mlx5e_tc_table * tc)164 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
165 {
166 return tc->chains;
167 }
168
169 /* To avoid false lock dependency warning set the tc_ht lock
170 * class different than the lock class of the ht being used when deleting
171 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
172 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
173 * it's different than the ht->mutex here.
174 */
175 static struct lock_class_key tc_ht_lock_key;
176 static struct lock_class_key tc_ht_wq_key;
177
178 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
179 static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
180 static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow,
181 struct mlx5_flow_attr *attr);
182
183 void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec * spec,enum mlx5e_tc_attr_to_reg type,u32 val,u32 mask)184 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
185 enum mlx5e_tc_attr_to_reg type,
186 u32 val,
187 u32 mask)
188 {
189 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
190 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
191 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
192 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
193 u32 max_mask = GENMASK(match_len - 1, 0);
194 __be32 curr_mask_be, curr_val_be;
195 u32 curr_mask, curr_val;
196
197 fmask = headers_c + soffset;
198 fval = headers_v + soffset;
199
200 memcpy(&curr_mask_be, fmask, 4);
201 memcpy(&curr_val_be, fval, 4);
202
203 curr_mask = be32_to_cpu(curr_mask_be);
204 curr_val = be32_to_cpu(curr_val_be);
205
206 //move to correct offset
207 WARN_ON(mask > max_mask);
208 mask <<= moffset;
209 val <<= moffset;
210 max_mask <<= moffset;
211
212 //zero val and mask
213 curr_mask &= ~max_mask;
214 curr_val &= ~max_mask;
215
216 //add current to mask
217 curr_mask |= mask;
218 curr_val |= val;
219
220 //back to be32 and write
221 curr_mask_be = cpu_to_be32(curr_mask);
222 curr_val_be = cpu_to_be32(curr_val);
223
224 memcpy(fmask, &curr_mask_be, 4);
225 memcpy(fval, &curr_val_be, 4);
226
227 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
228 }
229
230 void
mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec * spec,enum mlx5e_tc_attr_to_reg type,u32 * val,u32 * mask)231 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
232 enum mlx5e_tc_attr_to_reg type,
233 u32 *val,
234 u32 *mask)
235 {
236 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
237 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
238 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
239 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
240 u32 max_mask = GENMASK(match_len - 1, 0);
241 __be32 curr_mask_be, curr_val_be;
242 u32 curr_mask, curr_val;
243
244 fmask = headers_c + soffset;
245 fval = headers_v + soffset;
246
247 memcpy(&curr_mask_be, fmask, 4);
248 memcpy(&curr_val_be, fval, 4);
249
250 curr_mask = be32_to_cpu(curr_mask_be);
251 curr_val = be32_to_cpu(curr_val_be);
252
253 *mask = (curr_mask >> moffset) & max_mask;
254 *val = (curr_val >> moffset) & max_mask;
255 }
256
257 int
mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5_flow_namespace_type ns,enum mlx5e_tc_attr_to_reg type,u32 data)258 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
259 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
260 enum mlx5_flow_namespace_type ns,
261 enum mlx5e_tc_attr_to_reg type,
262 u32 data)
263 {
264 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
265 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
266 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
267 char *modact;
268 int err;
269
270 modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
271 if (IS_ERR(modact))
272 return PTR_ERR(modact);
273
274 /* Firmware has 5bit length field and 0 means 32bits */
275 if (mlen == 32)
276 mlen = 0;
277
278 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
279 MLX5_SET(set_action_in, modact, field, mfield);
280 MLX5_SET(set_action_in, modact, offset, moffset);
281 MLX5_SET(set_action_in, modact, length, mlen);
282 MLX5_SET(set_action_in, modact, data, data);
283 err = mod_hdr_acts->num_actions;
284 mod_hdr_acts->num_actions++;
285
286 return err;
287 }
288
289 static struct mlx5e_tc_act_stats_handle *
get_act_stats_handle(struct mlx5e_priv * priv)290 get_act_stats_handle(struct mlx5e_priv *priv)
291 {
292 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
293 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
294 struct mlx5_rep_uplink_priv *uplink_priv;
295 struct mlx5e_rep_priv *uplink_rpriv;
296
297 if (is_mdev_switchdev_mode(priv->mdev)) {
298 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
299 uplink_priv = &uplink_rpriv->uplink_priv;
300
301 return uplink_priv->action_stats_handle;
302 }
303
304 return tc->action_stats_handle;
305 }
306
307 struct mlx5e_tc_int_port_priv *
mlx5e_get_int_port_priv(struct mlx5e_priv * priv)308 mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
309 {
310 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
311 struct mlx5_rep_uplink_priv *uplink_priv;
312 struct mlx5e_rep_priv *uplink_rpriv;
313
314 if (is_mdev_switchdev_mode(priv->mdev)) {
315 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
316 uplink_priv = &uplink_rpriv->uplink_priv;
317
318 return uplink_priv->int_port_priv;
319 }
320
321 return NULL;
322 }
323
324 struct mlx5e_flow_meters *
mlx5e_get_flow_meters(struct mlx5_core_dev * dev)325 mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
326 {
327 struct mlx5_eswitch *esw = dev->priv.eswitch;
328 struct mlx5_rep_uplink_priv *uplink_priv;
329 struct mlx5e_rep_priv *uplink_rpriv;
330 struct mlx5e_priv *priv;
331
332 if (is_mdev_switchdev_mode(dev)) {
333 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
334 uplink_priv = &uplink_rpriv->uplink_priv;
335 priv = netdev_priv(uplink_rpriv->netdev);
336 if (!uplink_priv->flow_meters)
337 uplink_priv->flow_meters =
338 mlx5e_flow_meters_init(priv,
339 MLX5_FLOW_NAMESPACE_FDB,
340 uplink_priv->post_act);
341 if (!IS_ERR(uplink_priv->flow_meters))
342 return uplink_priv->flow_meters;
343 }
344
345 return NULL;
346 }
347
348 static struct mlx5_tc_ct_priv *
get_ct_priv(struct mlx5e_priv * priv)349 get_ct_priv(struct mlx5e_priv *priv)
350 {
351 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
352 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
353 struct mlx5_rep_uplink_priv *uplink_priv;
354 struct mlx5e_rep_priv *uplink_rpriv;
355
356 if (is_mdev_switchdev_mode(priv->mdev)) {
357 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
358 uplink_priv = &uplink_rpriv->uplink_priv;
359
360 return uplink_priv->ct_priv;
361 }
362
363 return tc->ct;
364 }
365
366 static struct mlx5e_tc_psample *
get_sample_priv(struct mlx5e_priv * priv)367 get_sample_priv(struct mlx5e_priv *priv)
368 {
369 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
370 struct mlx5_rep_uplink_priv *uplink_priv;
371 struct mlx5e_rep_priv *uplink_rpriv;
372
373 if (is_mdev_switchdev_mode(priv->mdev)) {
374 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
375 uplink_priv = &uplink_rpriv->uplink_priv;
376
377 return uplink_priv->tc_psample;
378 }
379
380 return NULL;
381 }
382
383 static struct mlx5e_post_act *
get_post_action(struct mlx5e_priv * priv)384 get_post_action(struct mlx5e_priv *priv)
385 {
386 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
387 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
388 struct mlx5_rep_uplink_priv *uplink_priv;
389 struct mlx5e_rep_priv *uplink_rpriv;
390
391 if (is_mdev_switchdev_mode(priv->mdev)) {
392 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
393 uplink_priv = &uplink_rpriv->uplink_priv;
394
395 return uplink_priv->post_act;
396 }
397
398 return tc->post_act;
399 }
400
401 struct mlx5_flow_handle *
mlx5_tc_rule_insert(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)402 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
403 struct mlx5_flow_spec *spec,
404 struct mlx5_flow_attr *attr)
405 {
406 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
407
408 if (is_mdev_switchdev_mode(priv->mdev))
409 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
410
411 return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
412 }
413
414 void
mlx5_tc_rule_delete(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)415 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
416 struct mlx5_flow_handle *rule,
417 struct mlx5_flow_attr *attr)
418 {
419 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
420
421 if (is_mdev_switchdev_mode(priv->mdev)) {
422 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
423 return;
424 }
425
426 mlx5e_del_offloaded_nic_rule(priv, rule, attr);
427 }
428
429 static bool
is_flow_meter_action(struct mlx5_flow_attr * attr)430 is_flow_meter_action(struct mlx5_flow_attr *attr)
431 {
432 return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
433 (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) ||
434 attr->flags & MLX5_ATTR_FLAG_MTU);
435 }
436
437 static int
mlx5e_tc_add_flow_meter(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr)438 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
439 struct mlx5_flow_attr *attr)
440 {
441 struct mlx5e_post_act *post_act = get_post_action(priv);
442 struct mlx5e_post_meter_priv *post_meter;
443 enum mlx5_flow_namespace_type ns_type;
444 struct mlx5e_flow_meter_handle *meter;
445 enum mlx5e_post_meter_type type;
446
447 if (IS_ERR(post_act))
448 return PTR_ERR(post_act);
449
450 meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
451 if (IS_ERR(meter)) {
452 mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
453 return PTR_ERR(meter);
454 }
455
456 ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
457 type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE;
458 post_meter = mlx5e_post_meter_init(priv, ns_type, post_act,
459 type,
460 meter->act_counter, meter->drop_counter,
461 attr->branch_true, attr->branch_false);
462 if (IS_ERR(post_meter)) {
463 mlx5_core_err(priv->mdev, "Failed to init post meter\n");
464 goto err_meter_init;
465 }
466
467 attr->meter_attr.meter = meter;
468 attr->meter_attr.post_meter = post_meter;
469 attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
470 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
471
472 return 0;
473
474 err_meter_init:
475 mlx5e_tc_meter_put(meter);
476 return PTR_ERR(post_meter);
477 }
478
479 static void
mlx5e_tc_del_flow_meter(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr)480 mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
481 {
482 mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter);
483 mlx5e_tc_meter_put(attr->meter_attr.meter);
484 }
485
486 struct mlx5_flow_handle *
mlx5e_tc_rule_offload(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)487 mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
488 struct mlx5_flow_spec *spec,
489 struct mlx5_flow_attr *attr)
490 {
491 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
492 int err;
493
494 if (!is_mdev_switchdev_mode(priv->mdev))
495 return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
496
497 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
498 return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
499
500 if (is_flow_meter_action(attr)) {
501 err = mlx5e_tc_add_flow_meter(priv, attr);
502 if (err)
503 return ERR_PTR(err);
504 }
505
506 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
507 }
508
509 void
mlx5e_tc_rule_unoffload(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)510 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
511 struct mlx5_flow_handle *rule,
512 struct mlx5_flow_attr *attr)
513 {
514 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
515
516 if (!is_mdev_switchdev_mode(priv->mdev)) {
517 mlx5e_del_offloaded_nic_rule(priv, rule, attr);
518 return;
519 }
520
521 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
522 mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
523 return;
524 }
525
526 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
527
528 if (attr->meter_attr.meter)
529 mlx5e_tc_del_flow_meter(esw, attr);
530 }
531
532 int
mlx5e_tc_match_to_reg_set(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5_flow_namespace_type ns,enum mlx5e_tc_attr_to_reg type,u32 data)533 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
534 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
535 enum mlx5_flow_namespace_type ns,
536 enum mlx5e_tc_attr_to_reg type,
537 u32 data)
538 {
539 int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
540
541 return ret < 0 ? ret : 0;
542 }
543
mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev * mdev,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,enum mlx5e_tc_attr_to_reg type,int act_id,u32 data)544 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
545 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
546 enum mlx5e_tc_attr_to_reg type,
547 int act_id, u32 data)
548 {
549 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
550 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
551 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
552 char *modact;
553
554 modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
555
556 /* Firmware has 5bit length field and 0 means 32bits */
557 if (mlen == 32)
558 mlen = 0;
559
560 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
561 MLX5_SET(set_action_in, modact, field, mfield);
562 MLX5_SET(set_action_in, modact, offset, moffset);
563 MLX5_SET(set_action_in, modact, length, mlen);
564 MLX5_SET(set_action_in, modact, data, data);
565 }
566
567 struct mlx5e_hairpin {
568 struct mlx5_hairpin *pair;
569
570 struct mlx5_core_dev *func_mdev;
571 struct mlx5e_priv *func_priv;
572 u32 tdn;
573 struct mlx5e_tir direct_tir;
574
575 int num_channels;
576 u8 log_num_packets;
577 struct mlx5e_rqt indir_rqt;
578 struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
579 struct mlx5_ttc_table *ttc;
580 };
581
582 struct mlx5e_hairpin_entry {
583 /* a node of a hash table which keeps all the hairpin entries */
584 struct hlist_node hairpin_hlist;
585
586 /* protects flows list */
587 spinlock_t flows_lock;
588 /* flows sharing the same hairpin */
589 struct list_head flows;
590 /* hpe's that were not fully initialized when dead peer update event
591 * function traversed them.
592 */
593 struct list_head dead_peer_wait_list;
594
595 u16 peer_vhca_id;
596 u8 prio;
597 struct mlx5e_hairpin *hp;
598 refcount_t refcnt;
599 struct completion res_ready;
600 };
601
602 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
603 struct mlx5e_tc_flow *flow);
604
mlx5e_flow_get(struct mlx5e_tc_flow * flow)605 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
606 {
607 if (!flow || !refcount_inc_not_zero(&flow->refcnt))
608 return ERR_PTR(-EINVAL);
609 return flow;
610 }
611
mlx5e_flow_put(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)612 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
613 {
614 if (refcount_dec_and_test(&flow->refcnt)) {
615 mlx5e_tc_del_flow(priv, flow);
616 kfree_rcu(flow, rcu_head);
617 }
618 }
619
mlx5e_is_eswitch_flow(struct mlx5e_tc_flow * flow)620 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
621 {
622 return flow_flag_test(flow, ESWITCH);
623 }
624
mlx5e_is_ft_flow(struct mlx5e_tc_flow * flow)625 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
626 {
627 return flow_flag_test(flow, FT);
628 }
629
mlx5e_is_offloaded_flow(struct mlx5e_tc_flow * flow)630 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
631 {
632 return flow_flag_test(flow, OFFLOADED);
633 }
634
mlx5e_get_flow_namespace(struct mlx5e_tc_flow * flow)635 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
636 {
637 return mlx5e_is_eswitch_flow(flow) ?
638 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
639 }
640
641 static struct mlx5_core_dev *
get_flow_counter_dev(struct mlx5e_tc_flow * flow)642 get_flow_counter_dev(struct mlx5e_tc_flow *flow)
643 {
644 return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
645 }
646
647 static struct mod_hdr_tbl *
get_mod_hdr_table(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)648 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
649 {
650 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
651 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
652
653 return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
654 &esw->offloads.mod_hdr :
655 &tc->mod_hdr;
656 }
657
mlx5e_tc_attach_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)658 int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv,
659 struct mlx5e_tc_flow *flow,
660 struct mlx5_flow_attr *attr)
661 {
662 struct mlx5e_mod_hdr_handle *mh;
663
664 mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
665 mlx5e_get_flow_namespace(flow),
666 &attr->parse_attr->mod_hdr_acts);
667 if (IS_ERR(mh))
668 return PTR_ERR(mh);
669
670 WARN_ON(attr->modify_hdr);
671 attr->modify_hdr = mlx5e_mod_hdr_get(mh);
672 attr->mh = mh;
673
674 return 0;
675 }
676
mlx5e_tc_detach_mod_hdr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)677 void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv,
678 struct mlx5e_tc_flow *flow,
679 struct mlx5_flow_attr *attr)
680 {
681 /* flow wasn't fully initialized */
682 if (!attr->mh)
683 return;
684
685 mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
686 attr->mh);
687 attr->mh = NULL;
688 }
689
690 static
mlx5e_hairpin_get_mdev(struct net * net,int ifindex)691 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
692 {
693 struct mlx5_core_dev *mdev;
694 struct net_device *netdev;
695 struct mlx5e_priv *priv;
696
697 netdev = dev_get_by_index(net, ifindex);
698 if (!netdev)
699 return ERR_PTR(-ENODEV);
700
701 priv = netdev_priv(netdev);
702 mdev = priv->mdev;
703 dev_put(netdev);
704
705 /* Mirred tc action holds a refcount on the ifindex net_device (see
706 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
707 * after dev_put(netdev), while we're in the context of adding a tc flow.
708 *
709 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
710 * stored in a hairpin object, which exists until all flows, that refer to it, get
711 * removed.
712 *
713 * On the other hand, after a hairpin object has been created, the peer net_device may
714 * be removed/unbound while there are still some hairpin flows that are using it. This
715 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
716 * NETDEV_UNREGISTER event of the peer net_device.
717 */
718 return mdev;
719 }
720
mlx5e_hairpin_create_transport(struct mlx5e_hairpin * hp)721 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
722 {
723 struct mlx5e_tir_builder *builder;
724 int err;
725
726 builder = mlx5e_tir_builder_alloc(false);
727 if (!builder)
728 return -ENOMEM;
729
730 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
731 if (err)
732 goto out;
733
734 mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
735 err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
736 if (err)
737 goto create_tir_err;
738
739 out:
740 mlx5e_tir_builder_free(builder);
741 return err;
742
743 create_tir_err:
744 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
745
746 goto out;
747 }
748
mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin * hp)749 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
750 {
751 mlx5e_tir_destroy(&hp->direct_tir);
752 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
753 }
754
mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin * hp)755 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
756 {
757 struct mlx5e_priv *priv = hp->func_priv;
758 struct mlx5_core_dev *mdev = priv->mdev;
759 struct mlx5e_rss_params_indir *indir;
760 int err;
761
762 indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
763 if (!indir)
764 return -ENOMEM;
765
766 mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
767 err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
768 mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
769 indir);
770
771 kvfree(indir);
772 return err;
773 }
774
mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin * hp)775 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
776 {
777 struct mlx5e_priv *priv = hp->func_priv;
778 struct mlx5e_rss_params_hash rss_hash;
779 enum mlx5_traffic_types tt, max_tt;
780 struct mlx5e_tir_builder *builder;
781 int err = 0;
782
783 builder = mlx5e_tir_builder_alloc(false);
784 if (!builder)
785 return -ENOMEM;
786
787 rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
788
789 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
790 struct mlx5e_rss_params_traffic_type rss_tt;
791
792 rss_tt = mlx5e_rss_get_default_tt_config(tt);
793
794 mlx5e_tir_builder_build_rqt(builder, hp->tdn,
795 mlx5e_rqt_get_rqtn(&hp->indir_rqt),
796 false);
797 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
798
799 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
800 if (err) {
801 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
802 goto err_destroy_tirs;
803 }
804
805 mlx5e_tir_builder_clear(builder);
806 }
807
808 out:
809 mlx5e_tir_builder_free(builder);
810 return err;
811
812 err_destroy_tirs:
813 max_tt = tt;
814 for (tt = 0; tt < max_tt; tt++)
815 mlx5e_tir_destroy(&hp->indir_tir[tt]);
816
817 goto out;
818 }
819
mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin * hp)820 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
821 {
822 int tt;
823
824 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
825 mlx5e_tir_destroy(&hp->indir_tir[tt]);
826 }
827
mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin * hp,struct ttc_params * ttc_params)828 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
829 struct ttc_params *ttc_params)
830 {
831 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
832 int tt;
833
834 memset(ttc_params, 0, sizeof(*ttc_params));
835
836 ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
837 MLX5_FLOW_NAMESPACE_KERNEL);
838 for (tt = 0; tt < MLX5_NUM_TT; tt++) {
839 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
840 ttc_params->dests[tt].tir_num =
841 tt == MLX5_TT_ANY ?
842 mlx5e_tir_get_tirn(&hp->direct_tir) :
843 mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
844 }
845
846 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
847 ft_attr->prio = MLX5E_TC_PRIO;
848 }
849
mlx5e_hairpin_rss_init(struct mlx5e_hairpin * hp)850 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
851 {
852 struct mlx5e_priv *priv = hp->func_priv;
853 struct ttc_params ttc_params;
854 struct mlx5_ttc_table *ttc;
855 int err;
856
857 err = mlx5e_hairpin_create_indirect_rqt(hp);
858 if (err)
859 return err;
860
861 err = mlx5e_hairpin_create_indirect_tirs(hp);
862 if (err)
863 goto err_create_indirect_tirs;
864
865 mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
866 hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
867 if (IS_ERR(hp->ttc)) {
868 err = PTR_ERR(hp->ttc);
869 goto err_create_ttc_table;
870 }
871
872 ttc = mlx5e_fs_get_ttc(priv->fs, false);
873 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
874 hp->num_channels,
875 mlx5_get_ttc_flow_table(ttc)->id);
876
877 return 0;
878
879 err_create_ttc_table:
880 mlx5e_hairpin_destroy_indirect_tirs(hp);
881 err_create_indirect_tirs:
882 mlx5e_rqt_destroy(&hp->indir_rqt);
883
884 return err;
885 }
886
mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin * hp)887 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
888 {
889 mlx5_destroy_ttc_table(hp->ttc);
890 mlx5e_hairpin_destroy_indirect_tirs(hp);
891 mlx5e_rqt_destroy(&hp->indir_rqt);
892 }
893
894 static struct mlx5e_hairpin *
mlx5e_hairpin_create(struct mlx5e_priv * priv,struct mlx5_hairpin_params * params,int peer_ifindex)895 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
896 int peer_ifindex)
897 {
898 struct mlx5_core_dev *func_mdev, *peer_mdev;
899 struct mlx5e_hairpin *hp;
900 struct mlx5_hairpin *pair;
901 int err;
902
903 hp = kzalloc(sizeof(*hp), GFP_KERNEL);
904 if (!hp)
905 return ERR_PTR(-ENOMEM);
906
907 func_mdev = priv->mdev;
908 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
909 if (IS_ERR(peer_mdev)) {
910 err = PTR_ERR(peer_mdev);
911 goto create_pair_err;
912 }
913
914 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
915 if (IS_ERR(pair)) {
916 err = PTR_ERR(pair);
917 goto create_pair_err;
918 }
919 hp->pair = pair;
920 hp->func_mdev = func_mdev;
921 hp->func_priv = priv;
922 hp->num_channels = params->num_channels;
923 hp->log_num_packets = params->log_num_packets;
924
925 err = mlx5e_hairpin_create_transport(hp);
926 if (err)
927 goto create_transport_err;
928
929 if (hp->num_channels > 1) {
930 err = mlx5e_hairpin_rss_init(hp);
931 if (err)
932 goto rss_init_err;
933 }
934
935 return hp;
936
937 rss_init_err:
938 mlx5e_hairpin_destroy_transport(hp);
939 create_transport_err:
940 mlx5_core_hairpin_destroy(hp->pair);
941 create_pair_err:
942 kfree(hp);
943 return ERR_PTR(err);
944 }
945
mlx5e_hairpin_destroy(struct mlx5e_hairpin * hp)946 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
947 {
948 if (hp->num_channels > 1)
949 mlx5e_hairpin_rss_cleanup(hp);
950 mlx5e_hairpin_destroy_transport(hp);
951 mlx5_core_hairpin_destroy(hp->pair);
952 kvfree(hp);
953 }
954
hash_hairpin_info(u16 peer_vhca_id,u8 prio)955 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
956 {
957 return (peer_vhca_id << 16 | prio);
958 }
959
mlx5e_hairpin_get(struct mlx5e_priv * priv,u16 peer_vhca_id,u8 prio)960 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
961 u16 peer_vhca_id, u8 prio)
962 {
963 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
964 struct mlx5e_hairpin_entry *hpe;
965 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
966
967 hash_for_each_possible(tc->hairpin_tbl, hpe,
968 hairpin_hlist, hash_key) {
969 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
970 refcount_inc(&hpe->refcnt);
971 return hpe;
972 }
973 }
974
975 return NULL;
976 }
977
mlx5e_hairpin_put(struct mlx5e_priv * priv,struct mlx5e_hairpin_entry * hpe)978 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
979 struct mlx5e_hairpin_entry *hpe)
980 {
981 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
982 /* no more hairpin flows for us, release the hairpin pair */
983 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
984 return;
985 hash_del(&hpe->hairpin_hlist);
986 mutex_unlock(&tc->hairpin_tbl_lock);
987
988 if (!IS_ERR_OR_NULL(hpe->hp)) {
989 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
990 dev_name(hpe->hp->pair->peer_mdev->device));
991
992 mlx5e_hairpin_destroy(hpe->hp);
993 }
994
995 WARN_ON(!list_empty(&hpe->flows));
996 kfree(hpe);
997 }
998
999 #define UNKNOWN_MATCH_PRIO 8
1000
mlx5e_hairpin_get_prio(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,u8 * match_prio,struct netlink_ext_ack * extack)1001 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
1002 struct mlx5_flow_spec *spec, u8 *match_prio,
1003 struct netlink_ext_ack *extack)
1004 {
1005 void *headers_c, *headers_v;
1006 u8 prio_val, prio_mask = 0;
1007 bool vlan_present;
1008
1009 #ifdef CONFIG_MLX5_CORE_EN_DCB
1010 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
1011 NL_SET_ERR_MSG_MOD(extack,
1012 "only PCP trust state supported for hairpin");
1013 return -EOPNOTSUPP;
1014 }
1015 #endif
1016 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1017 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1018
1019 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
1020 if (vlan_present) {
1021 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
1022 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
1023 }
1024
1025 if (!vlan_present || !prio_mask) {
1026 prio_val = UNKNOWN_MATCH_PRIO;
1027 } else if (prio_mask != 0x7) {
1028 NL_SET_ERR_MSG_MOD(extack,
1029 "masked priority match not supported for hairpin");
1030 return -EOPNOTSUPP;
1031 }
1032
1033 *match_prio = prio_val;
1034 return 0;
1035 }
1036
debugfs_hairpin_num_active_get(void * data,u64 * val)1037 static int debugfs_hairpin_num_active_get(void *data, u64 *val)
1038 {
1039 struct mlx5e_tc_table *tc = data;
1040 struct mlx5e_hairpin_entry *hpe;
1041 u32 cnt = 0;
1042 u32 bkt;
1043
1044 mutex_lock(&tc->hairpin_tbl_lock);
1045 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
1046 cnt++;
1047 mutex_unlock(&tc->hairpin_tbl_lock);
1048
1049 *val = cnt;
1050
1051 return 0;
1052 }
1053 DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active,
1054 debugfs_hairpin_num_active_get, NULL, "%llu\n");
1055
debugfs_hairpin_table_dump_show(struct seq_file * file,void * priv)1056 static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv)
1057
1058 {
1059 struct mlx5e_tc_table *tc = file->private;
1060 struct mlx5e_hairpin_entry *hpe;
1061 u32 bkt;
1062
1063 mutex_lock(&tc->hairpin_tbl_lock);
1064 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
1065 seq_printf(file,
1066 "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n",
1067 hpe->peer_vhca_id, hpe->prio,
1068 refcount_read(&hpe->refcnt), hpe->hp->num_channels,
1069 BIT(hpe->hp->log_num_packets));
1070 mutex_unlock(&tc->hairpin_tbl_lock);
1071
1072 return 0;
1073 }
1074 DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump);
1075
mlx5e_tc_debugfs_init(struct mlx5e_tc_table * tc,struct dentry * dfs_root)1076 static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc,
1077 struct dentry *dfs_root)
1078 {
1079 if (IS_ERR_OR_NULL(dfs_root))
1080 return;
1081
1082 tc->dfs_root = debugfs_create_dir("tc", dfs_root);
1083
1084 debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc,
1085 &fops_hairpin_num_active);
1086 debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc,
1087 &debugfs_hairpin_table_dump_fops);
1088 }
1089
mlx5e_hairpin_flow_add(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5e_tc_flow_parse_attr * parse_attr,struct netlink_ext_ack * extack)1090 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
1091 struct mlx5e_tc_flow *flow,
1092 struct mlx5e_tc_flow_parse_attr *parse_attr,
1093 struct netlink_ext_ack *extack)
1094 {
1095 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1096 struct devlink *devlink = priv_to_devlink(priv->mdev);
1097 int peer_ifindex = parse_attr->mirred_ifindex[0];
1098 union devlink_param_value val = {};
1099 struct mlx5_hairpin_params params;
1100 struct mlx5_core_dev *peer_mdev;
1101 struct mlx5e_hairpin_entry *hpe;
1102 struct mlx5e_hairpin *hp;
1103 u8 match_prio;
1104 u16 peer_id;
1105 int err;
1106
1107 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
1108 if (IS_ERR(peer_mdev)) {
1109 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
1110 return PTR_ERR(peer_mdev);
1111 }
1112
1113 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
1114 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
1115 return -EOPNOTSUPP;
1116 }
1117
1118 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
1119 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
1120 extack);
1121 if (err)
1122 return err;
1123
1124 mutex_lock(&tc->hairpin_tbl_lock);
1125 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
1126 if (hpe) {
1127 mutex_unlock(&tc->hairpin_tbl_lock);
1128 wait_for_completion(&hpe->res_ready);
1129
1130 if (IS_ERR(hpe->hp)) {
1131 err = -EREMOTEIO;
1132 goto out_err;
1133 }
1134 goto attach_flow;
1135 }
1136
1137 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
1138 if (!hpe) {
1139 mutex_unlock(&tc->hairpin_tbl_lock);
1140 return -ENOMEM;
1141 }
1142
1143 spin_lock_init(&hpe->flows_lock);
1144 INIT_LIST_HEAD(&hpe->flows);
1145 INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
1146 hpe->peer_vhca_id = peer_id;
1147 hpe->prio = match_prio;
1148 refcount_set(&hpe->refcnt, 1);
1149 init_completion(&hpe->res_ready);
1150
1151 hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
1152 hash_hairpin_info(peer_id, match_prio));
1153 mutex_unlock(&tc->hairpin_tbl_lock);
1154
1155 err = devl_param_driverinit_value_get(
1156 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val);
1157 if (err) {
1158 err = -ENOMEM;
1159 goto out_err;
1160 }
1161
1162 params.log_num_packets = ilog2(val.vu32);
1163 params.log_data_size =
1164 clamp_t(u32,
1165 params.log_num_packets +
1166 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev),
1167 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz),
1168 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
1169
1170 params.q_counter = priv->q_counter;
1171 err = devl_param_driverinit_value_get(
1172 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val);
1173 if (err) {
1174 err = -ENOMEM;
1175 goto out_err;
1176 }
1177
1178 params.num_channels = val.vu32;
1179
1180 hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex);
1181 hpe->hp = hp;
1182 complete_all(&hpe->res_ready);
1183 if (IS_ERR(hp)) {
1184 err = PTR_ERR(hp);
1185 goto out_err;
1186 }
1187
1188 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
1189 mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
1190 dev_name(hp->pair->peer_mdev->device),
1191 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
1192
1193 attach_flow:
1194 if (hpe->hp->num_channels > 1) {
1195 flow_flag_set(flow, HAIRPIN_RSS);
1196 flow->attr->nic_attr->hairpin_ft =
1197 mlx5_get_ttc_flow_table(hpe->hp->ttc);
1198 } else {
1199 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
1200 }
1201
1202 flow->hpe = hpe;
1203 spin_lock(&hpe->flows_lock);
1204 list_add(&flow->hairpin, &hpe->flows);
1205 spin_unlock(&hpe->flows_lock);
1206
1207 return 0;
1208
1209 out_err:
1210 mlx5e_hairpin_put(priv, hpe);
1211 return err;
1212 }
1213
mlx5e_hairpin_flow_del(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1214 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
1215 struct mlx5e_tc_flow *flow)
1216 {
1217 /* flow wasn't fully initialized */
1218 if (!flow->hpe)
1219 return;
1220
1221 spin_lock(&flow->hpe->flows_lock);
1222 list_del(&flow->hairpin);
1223 spin_unlock(&flow->hpe->flows_lock);
1224
1225 mlx5e_hairpin_put(priv, flow->hpe);
1226 flow->hpe = NULL;
1227 }
1228
1229 struct mlx5_flow_handle *
mlx5e_add_offloaded_nic_rule(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)1230 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
1231 struct mlx5_flow_spec *spec,
1232 struct mlx5_flow_attr *attr)
1233 {
1234 struct mlx5_flow_context *flow_context = &spec->flow_context;
1235 struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
1236 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1237 struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
1238 struct mlx5_flow_destination dest[2] = {};
1239 struct mlx5_fs_chains *nic_chains;
1240 struct mlx5_flow_act flow_act = {
1241 .action = attr->action,
1242 .flags = FLOW_ACT_NO_APPEND,
1243 };
1244 struct mlx5_flow_handle *rule;
1245 struct mlx5_flow_table *ft;
1246 int dest_ix = 0;
1247
1248 nic_chains = mlx5e_nic_chains(tc);
1249 flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1250 flow_context->flow_tag = nic_attr->flow_tag;
1251
1252 if (attr->dest_ft) {
1253 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1254 dest[dest_ix].ft = attr->dest_ft;
1255 dest_ix++;
1256 } else if (nic_attr->hairpin_ft) {
1257 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1258 dest[dest_ix].ft = nic_attr->hairpin_ft;
1259 dest_ix++;
1260 } else if (nic_attr->hairpin_tirn) {
1261 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1262 dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
1263 dest_ix++;
1264 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1265 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1266 if (attr->dest_chain) {
1267 dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1268 attr->dest_chain, 1,
1269 MLX5E_TC_FT_LEVEL);
1270 if (IS_ERR(dest[dest_ix].ft))
1271 return ERR_CAST(dest[dest_ix].ft);
1272 } else {
1273 dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
1274 }
1275 dest_ix++;
1276 }
1277
1278 if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1279 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1280 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1281
1282 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1283 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1284 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1285 dest_ix++;
1286 }
1287
1288 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1289 flow_act.modify_hdr = attr->modify_hdr;
1290
1291 mutex_lock(&tc->t_lock);
1292 if (IS_ERR_OR_NULL(tc->t)) {
1293 /* Create the root table here if doesn't exist yet */
1294 tc->t =
1295 mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1296
1297 if (IS_ERR(tc->t)) {
1298 mutex_unlock(&tc->t_lock);
1299 netdev_err(priv->netdev,
1300 "Failed to create tc offload table\n");
1301 rule = ERR_CAST(tc->t);
1302 goto err_ft_get;
1303 }
1304 }
1305 mutex_unlock(&tc->t_lock);
1306
1307 if (attr->chain || attr->prio)
1308 ft = mlx5_chains_get_table(nic_chains,
1309 attr->chain, attr->prio,
1310 MLX5E_TC_FT_LEVEL);
1311 else
1312 ft = attr->ft;
1313
1314 if (IS_ERR(ft)) {
1315 rule = ERR_CAST(ft);
1316 goto err_ft_get;
1317 }
1318
1319 if (attr->outer_match_level != MLX5_MATCH_NONE)
1320 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1321
1322 rule = mlx5_add_flow_rules(ft, spec,
1323 &flow_act, dest, dest_ix);
1324 if (IS_ERR(rule))
1325 goto err_rule;
1326
1327 return rule;
1328
1329 err_rule:
1330 if (attr->chain || attr->prio)
1331 mlx5_chains_put_table(nic_chains,
1332 attr->chain, attr->prio,
1333 MLX5E_TC_FT_LEVEL);
1334 err_ft_get:
1335 if (attr->dest_chain)
1336 mlx5_chains_put_table(nic_chains,
1337 attr->dest_chain, 1,
1338 MLX5E_TC_FT_LEVEL);
1339
1340 return ERR_CAST(rule);
1341 }
1342
1343 static int
alloc_flow_attr_counter(struct mlx5_core_dev * counter_dev,struct mlx5_flow_attr * attr)1344 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
1345 struct mlx5_flow_attr *attr)
1346
1347 {
1348 struct mlx5_fc *counter;
1349
1350 counter = mlx5_fc_create(counter_dev, true);
1351 if (IS_ERR(counter))
1352 return PTR_ERR(counter);
1353
1354 attr->counter = counter;
1355 return 0;
1356 }
1357
1358 static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)1359 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1360 struct mlx5e_tc_flow *flow,
1361 struct netlink_ext_ack *extack)
1362 {
1363 struct mlx5e_tc_flow_parse_attr *parse_attr;
1364 struct mlx5_flow_attr *attr = flow->attr;
1365 struct mlx5_core_dev *dev = priv->mdev;
1366 int err;
1367
1368 parse_attr = attr->parse_attr;
1369
1370 if (flow_flag_test(flow, HAIRPIN)) {
1371 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1372 if (err)
1373 return err;
1374 }
1375
1376 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1377 err = alloc_flow_attr_counter(dev, attr);
1378 if (err)
1379 return err;
1380 }
1381
1382 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1383 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1384 if (err)
1385 return err;
1386 }
1387
1388 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr);
1389 return PTR_ERR_OR_ZERO(flow->rule[0]);
1390 }
1391
mlx5e_del_offloaded_nic_rule(struct mlx5e_priv * priv,struct mlx5_flow_handle * rule,struct mlx5_flow_attr * attr)1392 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1393 struct mlx5_flow_handle *rule,
1394 struct mlx5_flow_attr *attr)
1395 {
1396 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1397 struct mlx5_fs_chains *nic_chains;
1398
1399 nic_chains = mlx5e_nic_chains(tc);
1400 mlx5_del_flow_rules(rule);
1401
1402 if (attr->chain || attr->prio)
1403 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1404 MLX5E_TC_FT_LEVEL);
1405
1406 if (attr->dest_chain)
1407 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1408 MLX5E_TC_FT_LEVEL);
1409 }
1410
mlx5e_tc_del_nic_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1411 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1412 struct mlx5e_tc_flow *flow)
1413 {
1414 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1415 struct mlx5_flow_attr *attr = flow->attr;
1416
1417 flow_flag_clear(flow, OFFLOADED);
1418
1419 if (!IS_ERR_OR_NULL(flow->rule[0]))
1420 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1421
1422 /* Remove root table if no rules are left to avoid
1423 * extra steering hops.
1424 */
1425 mutex_lock(&tc->t_lock);
1426 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1427 !IS_ERR_OR_NULL(tc->t)) {
1428 mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
1429 tc->t = NULL;
1430 }
1431 mutex_unlock(&tc->t_lock);
1432
1433 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1434 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1435 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1436 }
1437
1438 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1439 mlx5_fc_destroy(priv->mdev, attr->counter);
1440
1441 if (flow_flag_test(flow, HAIRPIN))
1442 mlx5e_hairpin_flow_del(priv, flow);
1443
1444 free_flow_post_acts(flow);
1445 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
1446
1447 kvfree(attr->parse_attr);
1448 kfree(flow->attr);
1449 }
1450
1451 struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr)1452 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1453 struct mlx5e_tc_flow *flow,
1454 struct mlx5_flow_spec *spec,
1455 struct mlx5_flow_attr *attr)
1456 {
1457 struct mlx5_flow_handle *rule;
1458
1459 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1460 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1461
1462 rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
1463
1464 if (IS_ERR(rule))
1465 return rule;
1466
1467 if (attr->esw_attr->split_count) {
1468 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1469 if (IS_ERR(flow->rule[1]))
1470 goto err_rule1;
1471 }
1472
1473 return rule;
1474
1475 err_rule1:
1476 mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
1477 return flow->rule[1];
1478 }
1479
mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1480 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1481 struct mlx5e_tc_flow *flow,
1482 struct mlx5_flow_attr *attr)
1483 {
1484 flow_flag_clear(flow, OFFLOADED);
1485
1486 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1487 return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1488
1489 if (attr->esw_attr->split_count)
1490 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1491
1492 mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
1493 }
1494
1495 struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)1496 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1497 struct mlx5e_tc_flow *flow,
1498 struct mlx5_flow_spec *spec)
1499 {
1500 struct mlx5e_tc_mod_hdr_acts mod_acts = {};
1501 struct mlx5e_mod_hdr_handle *mh = NULL;
1502 struct mlx5_flow_attr *slow_attr;
1503 struct mlx5_flow_handle *rule;
1504 bool fwd_and_modify_cap;
1505 u32 chain_mapping = 0;
1506 int err;
1507
1508 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1509 if (!slow_attr)
1510 return ERR_PTR(-ENOMEM);
1511
1512 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1513 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1514 slow_attr->esw_attr->split_count = 0;
1515 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1516
1517 fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table);
1518 if (!fwd_and_modify_cap)
1519 goto skip_restore;
1520
1521 err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping);
1522 if (err)
1523 goto err_get_chain;
1524
1525 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
1526 MAPPED_OBJ_TO_REG, chain_mapping);
1527 if (err)
1528 goto err_reg_set;
1529
1530 mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow),
1531 MLX5_FLOW_NAMESPACE_FDB, &mod_acts);
1532 if (IS_ERR(mh)) {
1533 err = PTR_ERR(mh);
1534 goto err_attach;
1535 }
1536
1537 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1538 slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh);
1539
1540 skip_restore:
1541 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1542 if (IS_ERR(rule)) {
1543 err = PTR_ERR(rule);
1544 goto err_offload;
1545 }
1546
1547 flow->attr->slow_mh = mh;
1548 flow->chain_mapping = chain_mapping;
1549 flow_flag_set(flow, SLOW);
1550
1551 mlx5e_mod_hdr_dealloc(&mod_acts);
1552 kfree(slow_attr);
1553
1554 return rule;
1555
1556 err_offload:
1557 if (fwd_and_modify_cap)
1558 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh);
1559 err_attach:
1560 err_reg_set:
1561 if (fwd_and_modify_cap)
1562 mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping);
1563 err_get_chain:
1564 mlx5e_mod_hdr_dealloc(&mod_acts);
1565 kfree(slow_attr);
1566 return ERR_PTR(err);
1567 }
1568
mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch * esw,struct mlx5e_tc_flow * flow)1569 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1570 struct mlx5e_tc_flow *flow)
1571 {
1572 struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh;
1573 struct mlx5_flow_attr *slow_attr;
1574
1575 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1576 if (!slow_attr) {
1577 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1578 return;
1579 }
1580
1581 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1582 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1583 slow_attr->esw_attr->split_count = 0;
1584 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1585 if (slow_mh) {
1586 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1587 slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh);
1588 }
1589 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1590 if (slow_mh) {
1591 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh);
1592 mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping);
1593 flow->chain_mapping = 0;
1594 flow->attr->slow_mh = NULL;
1595 }
1596 flow_flag_clear(flow, SLOW);
1597 kfree(slow_attr);
1598 }
1599
1600 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1601 * function.
1602 */
unready_flow_add(struct mlx5e_tc_flow * flow,struct list_head * unready_flows)1603 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1604 struct list_head *unready_flows)
1605 {
1606 flow_flag_set(flow, NOT_READY);
1607 list_add_tail(&flow->unready, unready_flows);
1608 }
1609
1610 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1611 * function.
1612 */
unready_flow_del(struct mlx5e_tc_flow * flow)1613 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1614 {
1615 list_del(&flow->unready);
1616 flow_flag_clear(flow, NOT_READY);
1617 }
1618
add_unready_flow(struct mlx5e_tc_flow * flow)1619 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1620 {
1621 struct mlx5_rep_uplink_priv *uplink_priv;
1622 struct mlx5e_rep_priv *rpriv;
1623 struct mlx5_eswitch *esw;
1624
1625 esw = flow->priv->mdev->priv.eswitch;
1626 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1627 uplink_priv = &rpriv->uplink_priv;
1628
1629 mutex_lock(&uplink_priv->unready_flows_lock);
1630 unready_flow_add(flow, &uplink_priv->unready_flows);
1631 mutex_unlock(&uplink_priv->unready_flows_lock);
1632 }
1633
remove_unready_flow(struct mlx5e_tc_flow * flow)1634 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1635 {
1636 struct mlx5_rep_uplink_priv *uplink_priv;
1637 struct mlx5e_rep_priv *rpriv;
1638 struct mlx5_eswitch *esw;
1639
1640 esw = flow->priv->mdev->priv.eswitch;
1641 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1642 uplink_priv = &rpriv->uplink_priv;
1643
1644 mutex_lock(&uplink_priv->unready_flows_lock);
1645 if (flow_flag_test(flow, NOT_READY))
1646 unready_flow_del(flow);
1647 mutex_unlock(&uplink_priv->unready_flows_lock);
1648 }
1649
mlx5e_tc_is_vf_tunnel(struct net_device * out_dev,struct net_device * route_dev)1650 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1651 {
1652 struct mlx5_core_dev *out_mdev, *route_mdev;
1653 struct mlx5e_priv *out_priv, *route_priv;
1654
1655 out_priv = netdev_priv(out_dev);
1656 out_mdev = out_priv->mdev;
1657 route_priv = netdev_priv(route_dev);
1658 route_mdev = route_priv->mdev;
1659
1660 if (out_mdev->coredev_type != MLX5_COREDEV_PF)
1661 return false;
1662
1663 if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
1664 route_mdev->coredev_type != MLX5_COREDEV_SF)
1665 return false;
1666
1667 return mlx5e_same_hw_devs(out_priv, route_priv);
1668 }
1669
mlx5e_tc_query_route_vport(struct net_device * out_dev,struct net_device * route_dev,u16 * vport)1670 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1671 {
1672 struct mlx5e_priv *out_priv, *route_priv;
1673 struct mlx5_core_dev *route_mdev;
1674 struct mlx5_devcom_comp_dev *pos;
1675 struct mlx5_eswitch *esw;
1676 u16 vhca_id;
1677 int err;
1678
1679 out_priv = netdev_priv(out_dev);
1680 esw = out_priv->mdev->priv.eswitch;
1681 route_priv = netdev_priv(route_dev);
1682 route_mdev = route_priv->mdev;
1683
1684 vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1685 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1686 if (!err)
1687 return err;
1688
1689 if (!mlx5_lag_is_active(out_priv->mdev))
1690 return err;
1691
1692 rcu_read_lock();
1693 err = -ENODEV;
1694 mlx5_devcom_for_each_peer_entry_rcu(esw->devcom, esw, pos) {
1695 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1696 if (!err)
1697 break;
1698 }
1699 rcu_read_unlock();
1700
1701 return err;
1702 }
1703
1704 static int
verify_attr_actions(u32 actions,struct netlink_ext_ack * extack)1705 verify_attr_actions(u32 actions, struct netlink_ext_ack *extack)
1706 {
1707 if (!(actions &
1708 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
1709 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
1710 return -EOPNOTSUPP;
1711 }
1712
1713 if (!(~actions &
1714 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
1715 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
1716 return -EOPNOTSUPP;
1717 }
1718
1719 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1720 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1721 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
1722 return -EOPNOTSUPP;
1723 }
1724
1725 return 0;
1726 }
1727
1728 static bool
has_encap_dests(struct mlx5_flow_attr * attr)1729 has_encap_dests(struct mlx5_flow_attr *attr)
1730 {
1731 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
1732 int out_index;
1733
1734 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1735 if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
1736 return true;
1737
1738 return false;
1739 }
1740
1741 static int
post_process_attr(struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack)1742 post_process_attr(struct mlx5e_tc_flow *flow,
1743 struct mlx5_flow_attr *attr,
1744 struct netlink_ext_ack *extack)
1745 {
1746 bool vf_tun;
1747 int err = 0;
1748
1749 err = verify_attr_actions(attr->action, extack);
1750 if (err)
1751 goto err_out;
1752
1753 if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) {
1754 err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
1755 if (err)
1756 goto err_out;
1757 }
1758
1759 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1760 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr);
1761 if (err)
1762 goto err_out;
1763 }
1764
1765 if (attr->branch_true &&
1766 attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1767 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true);
1768 if (err)
1769 goto err_out;
1770 }
1771
1772 if (attr->branch_false &&
1773 attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1774 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false);
1775 if (err)
1776 goto err_out;
1777 }
1778
1779 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1780 err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
1781 if (err)
1782 goto err_out;
1783 }
1784
1785 err_out:
1786 return err;
1787 }
1788
1789 static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)1790 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1791 struct mlx5e_tc_flow *flow,
1792 struct netlink_ext_ack *extack)
1793 {
1794 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1795 struct mlx5e_tc_flow_parse_attr *parse_attr;
1796 struct mlx5_flow_attr *attr = flow->attr;
1797 struct mlx5_esw_flow_attr *esw_attr;
1798 u32 max_prio, max_chain;
1799 int err = 0;
1800
1801 parse_attr = attr->parse_attr;
1802 esw_attr = attr->esw_attr;
1803
1804 /* We check chain range only for tc flows.
1805 * For ft flows, we checked attr->chain was originally 0 and set it to
1806 * FDB_FT_CHAIN which is outside tc range.
1807 * See mlx5e_rep_setup_ft_cb().
1808 */
1809 max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1810 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1811 NL_SET_ERR_MSG_MOD(extack,
1812 "Requested chain is out of supported range");
1813 err = -EOPNOTSUPP;
1814 goto err_out;
1815 }
1816
1817 max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1818 if (attr->prio > max_prio) {
1819 NL_SET_ERR_MSG_MOD(extack,
1820 "Requested priority is out of supported range");
1821 err = -EOPNOTSUPP;
1822 goto err_out;
1823 }
1824
1825 if (flow_flag_test(flow, TUN_RX)) {
1826 err = mlx5e_attach_decap_route(priv, flow);
1827 if (err)
1828 goto err_out;
1829
1830 if (!attr->chain && esw_attr->int_port &&
1831 attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1832 /* If decap route device is internal port, change the
1833 * source vport value in reg_c0 back to uplink just in
1834 * case the rule performs goto chain > 0. If we have a miss
1835 * on chain > 0 we want the metadata regs to hold the
1836 * chain id so SW will resume handling of this packet
1837 * from the proper chain.
1838 */
1839 u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
1840 esw_attr->in_rep->vport);
1841
1842 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
1843 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
1844 metadata);
1845 if (err)
1846 goto err_out;
1847
1848 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1849 }
1850 }
1851
1852 if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1853 err = mlx5e_attach_decap(priv, flow, extack);
1854 if (err)
1855 goto err_out;
1856 }
1857
1858 if (netif_is_ovs_master(parse_attr->filter_dev)) {
1859 struct mlx5e_tc_int_port *int_port;
1860
1861 if (attr->chain) {
1862 NL_SET_ERR_MSG_MOD(extack,
1863 "Internal port rule is only supported on chain 0");
1864 err = -EOPNOTSUPP;
1865 goto err_out;
1866 }
1867
1868 if (attr->dest_chain) {
1869 NL_SET_ERR_MSG_MOD(extack,
1870 "Internal port rule offload doesn't support goto action");
1871 err = -EOPNOTSUPP;
1872 goto err_out;
1873 }
1874
1875 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
1876 parse_attr->filter_dev->ifindex,
1877 flow_flag_test(flow, EGRESS) ?
1878 MLX5E_TC_INT_PORT_EGRESS :
1879 MLX5E_TC_INT_PORT_INGRESS);
1880 if (IS_ERR(int_port)) {
1881 err = PTR_ERR(int_port);
1882 goto err_out;
1883 }
1884
1885 esw_attr->int_port = int_port;
1886 }
1887
1888 err = post_process_attr(flow, attr, extack);
1889 if (err)
1890 goto err_out;
1891
1892 err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow);
1893 if (err)
1894 goto err_out;
1895
1896 /* we get here if one of the following takes place:
1897 * (1) there's no error
1898 * (2) there's an encap action and we don't have valid neigh
1899 */
1900 if (flow_flag_test(flow, SLOW))
1901 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1902 else
1903 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1904
1905 if (IS_ERR(flow->rule[0])) {
1906 err = PTR_ERR(flow->rule[0]);
1907 goto err_out;
1908 }
1909 flow_flag_set(flow, OFFLOADED);
1910
1911 return 0;
1912
1913 err_out:
1914 flow_flag_set(flow, FAILED);
1915 return err;
1916 }
1917
mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow * flow)1918 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1919 {
1920 struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1921 void *headers_v = MLX5_ADDR_OF(fte_match_param,
1922 spec->match_value,
1923 misc_parameters_3);
1924 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1925 headers_v,
1926 geneve_tlv_option_0_data);
1927
1928 return !!geneve_tlv_opt_0_data;
1929 }
1930
free_branch_attr(struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1931 static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
1932 {
1933 if (!attr)
1934 return;
1935
1936 mlx5_free_flow_attr_actions(flow, attr);
1937 kvfree(attr->parse_attr);
1938 kfree(attr);
1939 }
1940
mlx5e_tc_del_fdb_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1941 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1942 struct mlx5e_tc_flow *flow)
1943 {
1944 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1945 struct mlx5_flow_attr *attr = flow->attr;
1946
1947 mlx5e_put_flow_tunnel_id(flow);
1948
1949 remove_unready_flow(flow);
1950
1951 if (mlx5e_is_offloaded_flow(flow)) {
1952 if (flow_flag_test(flow, SLOW))
1953 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1954 else
1955 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1956 }
1957 complete_all(&flow->del_hw_done);
1958
1959 if (mlx5_flow_has_geneve_opt(flow))
1960 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1961
1962 if (flow->decap_route)
1963 mlx5e_detach_decap_route(priv, flow);
1964
1965 mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1966
1967 if (flow_flag_test(flow, L3_TO_L2_DECAP))
1968 mlx5e_detach_decap(priv, flow);
1969
1970 mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow);
1971
1972 free_flow_post_acts(flow);
1973 mlx5_free_flow_attr_actions(flow, attr);
1974
1975 kvfree(attr->esw_attr->rx_tun_attr);
1976 kvfree(attr->parse_attr);
1977 kfree(flow->attr);
1978 }
1979
mlx5e_tc_get_counter(struct mlx5e_tc_flow * flow)1980 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1981 {
1982 struct mlx5_flow_attr *attr;
1983
1984 attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
1985 return attr->counter;
1986 }
1987
1988 /* Iterate over tmp_list of flows attached to flow_list head. */
mlx5e_put_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1989 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1990 {
1991 struct mlx5e_tc_flow *flow, *tmp;
1992
1993 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1994 mlx5e_flow_put(priv, flow);
1995 }
1996
mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow * flow,int peer_index)1997 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
1998 int peer_index)
1999 {
2000 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
2001 struct mlx5e_tc_flow *peer_flow;
2002 struct mlx5e_tc_flow *tmp;
2003
2004 if (!flow_flag_test(flow, ESWITCH) ||
2005 !flow_flag_test(flow, DUP))
2006 return;
2007
2008 mutex_lock(&esw->offloads.peer_mutex);
2009 list_del(&flow->peer[peer_index]);
2010 mutex_unlock(&esw->offloads.peer_mutex);
2011
2012 list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
2013 if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev))
2014 continue;
2015
2016 list_del(&peer_flow->peer_flows);
2017 if (refcount_dec_and_test(&peer_flow->refcnt)) {
2018 mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow);
2019 kfree(peer_flow);
2020 }
2021 }
2022
2023 if (list_empty(&flow->peer_flows))
2024 flow_flag_clear(flow, DUP);
2025 }
2026
mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow * flow)2027 static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow)
2028 {
2029 int i;
2030
2031 for (i = 0; i < MLX5_MAX_PORTS; i++) {
2032 if (i == mlx5_get_dev_index(flow->priv->mdev))
2033 continue;
2034 mlx5e_tc_del_fdb_peer_flow(flow, i);
2035 }
2036 }
2037
mlx5e_tc_del_flow(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)2038 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
2039 struct mlx5e_tc_flow *flow)
2040 {
2041 if (mlx5e_is_eswitch_flow(flow)) {
2042 struct mlx5_devcom_comp_dev *devcom = flow->priv->mdev->priv.eswitch->devcom;
2043
2044 if (!mlx5_devcom_for_each_peer_begin(devcom)) {
2045 mlx5e_tc_del_fdb_flow(priv, flow);
2046 return;
2047 }
2048
2049 mlx5e_tc_del_fdb_peers_flow(flow);
2050 mlx5_devcom_for_each_peer_end(devcom);
2051 mlx5e_tc_del_fdb_flow(priv, flow);
2052 } else {
2053 mlx5e_tc_del_nic_flow(priv, flow);
2054 }
2055 }
2056
flow_requires_tunnel_mapping(u32 chain,struct flow_cls_offload * f)2057 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
2058 {
2059 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2060 struct flow_action *flow_action = &rule->action;
2061 const struct flow_action_entry *act;
2062 int i;
2063
2064 if (chain)
2065 return false;
2066
2067 flow_action_for_each(i, act, flow_action) {
2068 switch (act->id) {
2069 case FLOW_ACTION_GOTO:
2070 return true;
2071 case FLOW_ACTION_SAMPLE:
2072 return true;
2073 default:
2074 continue;
2075 }
2076 }
2077
2078 return false;
2079 }
2080
2081 static int
enc_opts_is_dont_care_or_full_match(struct mlx5e_priv * priv,struct flow_dissector_key_enc_opts * opts,struct netlink_ext_ack * extack,bool * dont_care)2082 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
2083 struct flow_dissector_key_enc_opts *opts,
2084 struct netlink_ext_ack *extack,
2085 bool *dont_care)
2086 {
2087 struct geneve_opt *opt;
2088 int off = 0;
2089
2090 *dont_care = true;
2091
2092 while (opts->len > off) {
2093 opt = (struct geneve_opt *)&opts->data[off];
2094
2095 if (!(*dont_care) || opt->opt_class || opt->type ||
2096 memchr_inv(opt->opt_data, 0, opt->length * 4)) {
2097 *dont_care = false;
2098
2099 if (opt->opt_class != htons(U16_MAX) ||
2100 opt->type != U8_MAX) {
2101 NL_SET_ERR_MSG_MOD(extack,
2102 "Partial match of tunnel options in chain > 0 isn't supported");
2103 netdev_warn(priv->netdev,
2104 "Partial match of tunnel options in chain > 0 isn't supported");
2105 return -EOPNOTSUPP;
2106 }
2107 }
2108
2109 off += sizeof(struct geneve_opt) + opt->length * 4;
2110 }
2111
2112 return 0;
2113 }
2114
2115 #define COPY_DISSECTOR(rule, diss_key, dst)\
2116 ({ \
2117 struct flow_rule *__rule = (rule);\
2118 typeof(dst) __dst = dst;\
2119 \
2120 memcpy(__dst,\
2121 skb_flow_dissector_target(__rule->match.dissector,\
2122 diss_key,\
2123 __rule->match.key),\
2124 sizeof(*__dst));\
2125 })
2126
mlx5e_get_flow_tunnel_id(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct flow_cls_offload * f,struct net_device * filter_dev)2127 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
2128 struct mlx5e_tc_flow *flow,
2129 struct flow_cls_offload *f,
2130 struct net_device *filter_dev)
2131 {
2132 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2133 struct netlink_ext_ack *extack = f->common.extack;
2134 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
2135 struct flow_match_enc_opts enc_opts_match;
2136 struct tunnel_match_enc_opts tun_enc_opts;
2137 struct mlx5_rep_uplink_priv *uplink_priv;
2138 struct mlx5_flow_attr *attr = flow->attr;
2139 struct mlx5e_rep_priv *uplink_rpriv;
2140 struct tunnel_match_key tunnel_key;
2141 bool enc_opts_is_dont_care = true;
2142 u32 tun_id, enc_opts_id = 0;
2143 struct mlx5_eswitch *esw;
2144 u32 value, mask;
2145 int err;
2146
2147 esw = priv->mdev->priv.eswitch;
2148 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2149 uplink_priv = &uplink_rpriv->uplink_priv;
2150
2151 memset(&tunnel_key, 0, sizeof(tunnel_key));
2152 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2153 &tunnel_key.enc_control);
2154 if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2155 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2156 &tunnel_key.enc_ipv4);
2157 else
2158 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2159 &tunnel_key.enc_ipv6);
2160 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2161 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2162 &tunnel_key.enc_tp);
2163 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2164 &tunnel_key.enc_key_id);
2165 tunnel_key.filter_ifindex = filter_dev->ifindex;
2166
2167 err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2168 if (err)
2169 return err;
2170
2171 flow_rule_match_enc_opts(rule, &enc_opts_match);
2172 err = enc_opts_is_dont_care_or_full_match(priv,
2173 enc_opts_match.mask,
2174 extack,
2175 &enc_opts_is_dont_care);
2176 if (err)
2177 goto err_enc_opts;
2178
2179 if (!enc_opts_is_dont_care) {
2180 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2181 memcpy(&tun_enc_opts.key, enc_opts_match.key,
2182 sizeof(*enc_opts_match.key));
2183 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2184 sizeof(*enc_opts_match.mask));
2185
2186 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2187 &tun_enc_opts, &enc_opts_id);
2188 if (err)
2189 goto err_enc_opts;
2190 }
2191
2192 value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2193 mask = enc_opts_id ? TUNNEL_ID_MASK :
2194 (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2195
2196 if (attr->chain) {
2197 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2198 TUNNEL_TO_REG, value, mask);
2199 } else {
2200 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2201 err = mlx5e_tc_match_to_reg_set(priv->mdev,
2202 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2203 TUNNEL_TO_REG, value);
2204 if (err)
2205 goto err_set;
2206
2207 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2208 }
2209
2210 flow->attr->tunnel_id = value;
2211 return 0;
2212
2213 err_set:
2214 if (enc_opts_id)
2215 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2216 enc_opts_id);
2217 err_enc_opts:
2218 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2219 return err;
2220 }
2221
mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow * flow)2222 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2223 {
2224 u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
2225 u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
2226 struct mlx5_rep_uplink_priv *uplink_priv;
2227 struct mlx5e_rep_priv *uplink_rpriv;
2228 struct mlx5_eswitch *esw;
2229
2230 esw = flow->priv->mdev->priv.eswitch;
2231 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2232 uplink_priv = &uplink_rpriv->uplink_priv;
2233
2234 if (tun_id)
2235 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2236 if (enc_opts_id)
2237 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2238 enc_opts_id);
2239 }
2240
mlx5e_tc_set_ethertype(struct mlx5_core_dev * mdev,struct flow_match_basic * match,bool outer,void * headers_c,void * headers_v)2241 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2242 struct flow_match_basic *match, bool outer,
2243 void *headers_c, void *headers_v)
2244 {
2245 bool ip_version_cap;
2246
2247 ip_version_cap = outer ?
2248 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2249 ft_field_support.outer_ip_version) :
2250 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2251 ft_field_support.inner_ip_version);
2252
2253 if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2254 (match->key->n_proto == htons(ETH_P_IP) ||
2255 match->key->n_proto == htons(ETH_P_IPV6))) {
2256 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2257 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2258 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2259 } else {
2260 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2261 ntohs(match->mask->n_proto));
2262 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2263 ntohs(match->key->n_proto));
2264 }
2265 }
2266
mlx5e_tc_get_ip_version(struct mlx5_flow_spec * spec,bool outer)2267 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2268 {
2269 void *headers_v;
2270 u16 ethertype;
2271 u8 ip_version;
2272
2273 if (outer)
2274 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2275 else
2276 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2277
2278 ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2279 /* Return ip_version converted from ethertype anyway */
2280 if (!ip_version) {
2281 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2282 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2283 ip_version = 4;
2284 else if (ethertype == ETH_P_IPV6)
2285 ip_version = 6;
2286 }
2287 return ip_version;
2288 }
2289
2290 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2291 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2292 * +---------+----------------------------------------+
2293 * |Arriving | Arriving Outer Header |
2294 * | Inner +---------+---------+---------+----------+
2295 * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
2296 * +---------+---------+---------+---------+----------+
2297 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
2298 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
2299 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
2300 * | CE | CE | CE | CE | CE |
2301 * +---------+---------+---------+---------+----------+
2302 *
2303 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2304 * the inner ip_ecn value before hardware decap action.
2305 *
2306 * Cells marked are changed from original inner packet ip_ecn value during decap, and
2307 * so matching those values on inner ip_ecn before decap will fail.
2308 *
2309 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2310 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2311 * and such we can drop the inner ip_ecn=CE match.
2312 */
2313
mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv * priv,struct flow_cls_offload * f,bool * match_inner_ecn)2314 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
2315 struct flow_cls_offload *f,
2316 bool *match_inner_ecn)
2317 {
2318 u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
2319 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2320 struct netlink_ext_ack *extack = f->common.extack;
2321 struct flow_match_ip match;
2322
2323 *match_inner_ecn = true;
2324
2325 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
2326 flow_rule_match_enc_ip(rule, &match);
2327 outer_ecn_key = match.key->tos & INET_ECN_MASK;
2328 outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
2329 }
2330
2331 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2332 flow_rule_match_ip(rule, &match);
2333 inner_ecn_key = match.key->tos & INET_ECN_MASK;
2334 inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
2335 }
2336
2337 if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
2338 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
2339 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
2340 return -EOPNOTSUPP;
2341 }
2342
2343 if (!outer_ecn_mask) {
2344 if (!inner_ecn_mask)
2345 return 0;
2346
2347 NL_SET_ERR_MSG_MOD(extack,
2348 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2349 netdev_warn(priv->netdev,
2350 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2351 return -EOPNOTSUPP;
2352 }
2353
2354 if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
2355 NL_SET_ERR_MSG_MOD(extack,
2356 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2357 netdev_warn(priv->netdev,
2358 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2359 return -EOPNOTSUPP;
2360 }
2361
2362 if (!inner_ecn_mask)
2363 return 0;
2364
2365 /* Both inner and outer have full mask on ecn */
2366
2367 if (outer_ecn_key == INET_ECN_ECT_1) {
2368 /* inner ecn might change by DECAP action */
2369
2370 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
2371 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
2372 return -EOPNOTSUPP;
2373 }
2374
2375 if (outer_ecn_key != INET_ECN_CE)
2376 return 0;
2377
2378 if (inner_ecn_key != INET_ECN_CE) {
2379 /* Can't happen in software, as packet ecn will be changed to CE after decap */
2380 NL_SET_ERR_MSG_MOD(extack,
2381 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2382 netdev_warn(priv->netdev,
2383 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2384 return -EOPNOTSUPP;
2385 }
2386
2387 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2388 * drop match on inner ecn
2389 */
2390 *match_inner_ecn = false;
2391
2392 return 0;
2393 }
2394
parse_tunnel_attr(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev,u8 * match_level,bool * match_inner)2395 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2396 struct mlx5e_tc_flow *flow,
2397 struct mlx5_flow_spec *spec,
2398 struct flow_cls_offload *f,
2399 struct net_device *filter_dev,
2400 u8 *match_level,
2401 bool *match_inner)
2402 {
2403 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2404 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2405 struct netlink_ext_ack *extack = f->common.extack;
2406 bool needs_mapping, sets_mapping;
2407 int err;
2408
2409 if (!mlx5e_is_eswitch_flow(flow)) {
2410 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
2411 return -EOPNOTSUPP;
2412 }
2413
2414 needs_mapping = !!flow->attr->chain;
2415 sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
2416 *match_inner = !needs_mapping;
2417
2418 if ((needs_mapping || sets_mapping) &&
2419 !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2420 NL_SET_ERR_MSG_MOD(extack,
2421 "Chains on tunnel devices isn't supported without register loopback support");
2422 netdev_warn(priv->netdev,
2423 "Chains on tunnel devices isn't supported without register loopback support");
2424 return -EOPNOTSUPP;
2425 }
2426
2427 if (!flow->attr->chain) {
2428 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2429 match_level);
2430 if (err) {
2431 NL_SET_ERR_MSG_MOD(extack,
2432 "Failed to parse tunnel attributes");
2433 netdev_warn(priv->netdev,
2434 "Failed to parse tunnel attributes");
2435 return err;
2436 }
2437
2438 /* With mpls over udp we decapsulate using packet reformat
2439 * object
2440 */
2441 if (!netif_is_bareudp(filter_dev))
2442 flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2443 err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2444 if (err)
2445 return err;
2446 } else if (tunnel) {
2447 struct mlx5_flow_spec *tmp_spec;
2448
2449 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2450 if (!tmp_spec) {
2451 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec");
2452 netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec");
2453 return -ENOMEM;
2454 }
2455 memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2456
2457 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2458 if (err) {
2459 kvfree(tmp_spec);
2460 NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2461 netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2462 return err;
2463 }
2464 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2465 kvfree(tmp_spec);
2466 if (err)
2467 return err;
2468 }
2469
2470 if (!needs_mapping && !sets_mapping)
2471 return 0;
2472
2473 return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2474 }
2475
get_match_inner_headers_criteria(struct mlx5_flow_spec * spec)2476 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2477 {
2478 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2479 inner_headers);
2480 }
2481
get_match_inner_headers_value(struct mlx5_flow_spec * spec)2482 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2483 {
2484 return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2485 inner_headers);
2486 }
2487
get_match_outer_headers_criteria(struct mlx5_flow_spec * spec)2488 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2489 {
2490 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2491 outer_headers);
2492 }
2493
get_match_outer_headers_value(struct mlx5_flow_spec * spec)2494 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2495 {
2496 return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2497 outer_headers);
2498 }
2499
mlx5e_get_match_headers_value(u32 flags,struct mlx5_flow_spec * spec)2500 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
2501 {
2502 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2503 get_match_inner_headers_value(spec) :
2504 get_match_outer_headers_value(spec);
2505 }
2506
mlx5e_get_match_headers_criteria(u32 flags,struct mlx5_flow_spec * spec)2507 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
2508 {
2509 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2510 get_match_inner_headers_criteria(spec) :
2511 get_match_outer_headers_criteria(spec);
2512 }
2513
mlx5e_flower_parse_meta(struct net_device * filter_dev,struct flow_cls_offload * f)2514 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2515 struct flow_cls_offload *f)
2516 {
2517 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2518 struct netlink_ext_ack *extack = f->common.extack;
2519 struct net_device *ingress_dev;
2520 struct flow_match_meta match;
2521
2522 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2523 return 0;
2524
2525 flow_rule_match_meta(rule, &match);
2526
2527 if (match.mask->l2_miss) {
2528 NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\"");
2529 return -EOPNOTSUPP;
2530 }
2531
2532 if (!match.mask->ingress_ifindex)
2533 return 0;
2534
2535 if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2536 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2537 return -EOPNOTSUPP;
2538 }
2539
2540 ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2541 match.key->ingress_ifindex);
2542 if (!ingress_dev) {
2543 NL_SET_ERR_MSG_MOD(extack,
2544 "Can't find the ingress port to match on");
2545 return -ENOENT;
2546 }
2547
2548 if (ingress_dev != filter_dev) {
2549 NL_SET_ERR_MSG_MOD(extack,
2550 "Can't match on the ingress filter port");
2551 return -EOPNOTSUPP;
2552 }
2553
2554 return 0;
2555 }
2556
skip_key_basic(struct net_device * filter_dev,struct flow_cls_offload * f)2557 static bool skip_key_basic(struct net_device *filter_dev,
2558 struct flow_cls_offload *f)
2559 {
2560 /* When doing mpls over udp decap, the user needs to provide
2561 * MPLS_UC as the protocol in order to be able to match on mpls
2562 * label fields. However, the actual ethertype is IP so we want to
2563 * avoid matching on this, otherwise we'll fail the match.
2564 */
2565 if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2566 return true;
2567
2568 return false;
2569 }
2570
__parse_cls_flower(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev,u8 * inner_match_level,u8 * outer_match_level)2571 static int __parse_cls_flower(struct mlx5e_priv *priv,
2572 struct mlx5e_tc_flow *flow,
2573 struct mlx5_flow_spec *spec,
2574 struct flow_cls_offload *f,
2575 struct net_device *filter_dev,
2576 u8 *inner_match_level, u8 *outer_match_level)
2577 {
2578 struct netlink_ext_ack *extack = f->common.extack;
2579 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2580 outer_headers);
2581 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2582 outer_headers);
2583 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2584 misc_parameters);
2585 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2586 misc_parameters);
2587 void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2588 misc_parameters_3);
2589 void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2590 misc_parameters_3);
2591 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2592 struct flow_dissector *dissector = rule->match.dissector;
2593 enum fs_flow_table_type fs_type;
2594 bool match_inner_ecn = true;
2595 u16 addr_type = 0;
2596 u8 ip_proto = 0;
2597 u8 *match_level;
2598 int err;
2599
2600 fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2601 match_level = outer_match_level;
2602
2603 if (dissector->used_keys &
2604 ~(BIT_ULL(FLOW_DISSECTOR_KEY_META) |
2605 BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
2606 BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
2607 BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2608 BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
2609 BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
2610 BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2611 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2612 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
2613 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2614 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2615 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2616 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2617 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2618 BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
2619 BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
2620 BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
2621 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
2622 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2623 BIT_ULL(FLOW_DISSECTOR_KEY_ICMP) |
2624 BIT_ULL(FLOW_DISSECTOR_KEY_MPLS))) {
2625 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2626 netdev_dbg(priv->netdev, "Unsupported key used: 0x%llx\n",
2627 dissector->used_keys);
2628 return -EOPNOTSUPP;
2629 }
2630
2631 if (mlx5e_get_tc_tun(filter_dev)) {
2632 bool match_inner = false;
2633
2634 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2635 outer_match_level, &match_inner);
2636 if (err)
2637 return err;
2638
2639 if (match_inner) {
2640 /* header pointers should point to the inner headers
2641 * if the packet was decapsulated already.
2642 * outer headers are set by parse_tunnel_attr.
2643 */
2644 match_level = inner_match_level;
2645 headers_c = get_match_inner_headers_criteria(spec);
2646 headers_v = get_match_inner_headers_value(spec);
2647 }
2648
2649 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2650 if (err)
2651 return err;
2652 }
2653
2654 err = mlx5e_flower_parse_meta(filter_dev, f);
2655 if (err)
2656 return err;
2657
2658 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2659 !skip_key_basic(filter_dev, f)) {
2660 struct flow_match_basic match;
2661
2662 flow_rule_match_basic(rule, &match);
2663 mlx5e_tc_set_ethertype(priv->mdev, &match,
2664 match_level == outer_match_level,
2665 headers_c, headers_v);
2666
2667 if (match.mask->n_proto)
2668 *match_level = MLX5_MATCH_L2;
2669 }
2670 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2671 is_vlan_dev(filter_dev)) {
2672 struct flow_dissector_key_vlan filter_dev_mask;
2673 struct flow_dissector_key_vlan filter_dev_key;
2674 struct flow_match_vlan match;
2675
2676 if (is_vlan_dev(filter_dev)) {
2677 match.key = &filter_dev_key;
2678 match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2679 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2680 match.key->vlan_priority = 0;
2681 match.mask = &filter_dev_mask;
2682 memset(match.mask, 0xff, sizeof(*match.mask));
2683 match.mask->vlan_priority = 0;
2684 } else {
2685 flow_rule_match_vlan(rule, &match);
2686 }
2687 if (match.mask->vlan_id ||
2688 match.mask->vlan_priority ||
2689 match.mask->vlan_tpid) {
2690 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2691 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2692 svlan_tag, 1);
2693 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2694 svlan_tag, 1);
2695 } else {
2696 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2697 cvlan_tag, 1);
2698 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2699 cvlan_tag, 1);
2700 }
2701
2702 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2703 match.mask->vlan_id);
2704 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2705 match.key->vlan_id);
2706
2707 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2708 match.mask->vlan_priority);
2709 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2710 match.key->vlan_priority);
2711
2712 *match_level = MLX5_MATCH_L2;
2713
2714 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2715 match.mask->vlan_eth_type &&
2716 MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2717 ft_field_support.outer_second_vid,
2718 fs_type)) {
2719 MLX5_SET(fte_match_set_misc, misc_c,
2720 outer_second_cvlan_tag, 1);
2721 spec->match_criteria_enable |=
2722 MLX5_MATCH_MISC_PARAMETERS;
2723 }
2724 }
2725 } else if (*match_level != MLX5_MATCH_NONE) {
2726 /* cvlan_tag enabled in match criteria and
2727 * disabled in match value means both S & C tags
2728 * don't exist (untagged of both)
2729 */
2730 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2731 *match_level = MLX5_MATCH_L2;
2732 }
2733
2734 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2735 struct flow_match_vlan match;
2736
2737 flow_rule_match_cvlan(rule, &match);
2738 if (match.mask->vlan_id ||
2739 match.mask->vlan_priority ||
2740 match.mask->vlan_tpid) {
2741 if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2742 fs_type)) {
2743 NL_SET_ERR_MSG_MOD(extack,
2744 "Matching on CVLAN is not supported");
2745 return -EOPNOTSUPP;
2746 }
2747
2748 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2749 MLX5_SET(fte_match_set_misc, misc_c,
2750 outer_second_svlan_tag, 1);
2751 MLX5_SET(fte_match_set_misc, misc_v,
2752 outer_second_svlan_tag, 1);
2753 } else {
2754 MLX5_SET(fte_match_set_misc, misc_c,
2755 outer_second_cvlan_tag, 1);
2756 MLX5_SET(fte_match_set_misc, misc_v,
2757 outer_second_cvlan_tag, 1);
2758 }
2759
2760 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2761 match.mask->vlan_id);
2762 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2763 match.key->vlan_id);
2764 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2765 match.mask->vlan_priority);
2766 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2767 match.key->vlan_priority);
2768
2769 *match_level = MLX5_MATCH_L2;
2770 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2771 }
2772 }
2773
2774 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2775 struct flow_match_eth_addrs match;
2776
2777 flow_rule_match_eth_addrs(rule, &match);
2778 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2779 dmac_47_16),
2780 match.mask->dst);
2781 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2782 dmac_47_16),
2783 match.key->dst);
2784
2785 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2786 smac_47_16),
2787 match.mask->src);
2788 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2789 smac_47_16),
2790 match.key->src);
2791
2792 if (!is_zero_ether_addr(match.mask->src) ||
2793 !is_zero_ether_addr(match.mask->dst))
2794 *match_level = MLX5_MATCH_L2;
2795 }
2796
2797 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2798 struct flow_match_control match;
2799
2800 flow_rule_match_control(rule, &match);
2801 addr_type = match.key->addr_type;
2802
2803 /* the HW doesn't support frag first/later */
2804 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
2805 NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
2806 return -EOPNOTSUPP;
2807 }
2808
2809 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2810 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2811 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2812 match.key->flags & FLOW_DIS_IS_FRAGMENT);
2813
2814 /* the HW doesn't need L3 inline to match on frag=no */
2815 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2816 *match_level = MLX5_MATCH_L2;
2817 /* *** L2 attributes parsing up to here *** */
2818 else
2819 *match_level = MLX5_MATCH_L3;
2820 }
2821 }
2822
2823 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2824 struct flow_match_basic match;
2825
2826 flow_rule_match_basic(rule, &match);
2827 ip_proto = match.key->ip_proto;
2828
2829 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2830 match.mask->ip_proto);
2831 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2832 match.key->ip_proto);
2833
2834 if (match.mask->ip_proto)
2835 *match_level = MLX5_MATCH_L3;
2836 }
2837
2838 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2839 struct flow_match_ipv4_addrs match;
2840
2841 flow_rule_match_ipv4_addrs(rule, &match);
2842 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2843 src_ipv4_src_ipv6.ipv4_layout.ipv4),
2844 &match.mask->src, sizeof(match.mask->src));
2845 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2846 src_ipv4_src_ipv6.ipv4_layout.ipv4),
2847 &match.key->src, sizeof(match.key->src));
2848 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2849 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2850 &match.mask->dst, sizeof(match.mask->dst));
2851 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2852 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2853 &match.key->dst, sizeof(match.key->dst));
2854
2855 if (match.mask->src || match.mask->dst)
2856 *match_level = MLX5_MATCH_L3;
2857 }
2858
2859 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2860 struct flow_match_ipv6_addrs match;
2861
2862 flow_rule_match_ipv6_addrs(rule, &match);
2863 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2864 src_ipv4_src_ipv6.ipv6_layout.ipv6),
2865 &match.mask->src, sizeof(match.mask->src));
2866 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2867 src_ipv4_src_ipv6.ipv6_layout.ipv6),
2868 &match.key->src, sizeof(match.key->src));
2869
2870 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2871 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2872 &match.mask->dst, sizeof(match.mask->dst));
2873 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2874 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2875 &match.key->dst, sizeof(match.key->dst));
2876
2877 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2878 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2879 *match_level = MLX5_MATCH_L3;
2880 }
2881
2882 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2883 struct flow_match_ip match;
2884
2885 flow_rule_match_ip(rule, &match);
2886 if (match_inner_ecn) {
2887 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2888 match.mask->tos & 0x3);
2889 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2890 match.key->tos & 0x3);
2891 }
2892
2893 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2894 match.mask->tos >> 2);
2895 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2896 match.key->tos >> 2);
2897
2898 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2899 match.mask->ttl);
2900 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2901 match.key->ttl);
2902
2903 if (match.mask->ttl &&
2904 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2905 ft_field_support.outer_ipv4_ttl)) {
2906 NL_SET_ERR_MSG_MOD(extack,
2907 "Matching on TTL is not supported");
2908 return -EOPNOTSUPP;
2909 }
2910
2911 if (match.mask->tos || match.mask->ttl)
2912 *match_level = MLX5_MATCH_L3;
2913 }
2914
2915 /* *** L3 attributes parsing up to here *** */
2916
2917 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2918 struct flow_match_ports match;
2919
2920 flow_rule_match_ports(rule, &match);
2921 switch (ip_proto) {
2922 case IPPROTO_TCP:
2923 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2924 tcp_sport, ntohs(match.mask->src));
2925 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2926 tcp_sport, ntohs(match.key->src));
2927
2928 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2929 tcp_dport, ntohs(match.mask->dst));
2930 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2931 tcp_dport, ntohs(match.key->dst));
2932 break;
2933
2934 case IPPROTO_UDP:
2935 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2936 udp_sport, ntohs(match.mask->src));
2937 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2938 udp_sport, ntohs(match.key->src));
2939
2940 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2941 udp_dport, ntohs(match.mask->dst));
2942 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2943 udp_dport, ntohs(match.key->dst));
2944 break;
2945 default:
2946 NL_SET_ERR_MSG_MOD(extack,
2947 "Only UDP and TCP transports are supported for L4 matching");
2948 netdev_err(priv->netdev,
2949 "Only UDP and TCP transport are supported\n");
2950 return -EINVAL;
2951 }
2952
2953 if (match.mask->src || match.mask->dst)
2954 *match_level = MLX5_MATCH_L4;
2955 }
2956
2957 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2958 struct flow_match_tcp match;
2959
2960 flow_rule_match_tcp(rule, &match);
2961 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2962 ntohs(match.mask->flags));
2963 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2964 ntohs(match.key->flags));
2965
2966 if (match.mask->flags)
2967 *match_level = MLX5_MATCH_L4;
2968 }
2969 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
2970 struct flow_match_icmp match;
2971
2972 flow_rule_match_icmp(rule, &match);
2973 switch (ip_proto) {
2974 case IPPROTO_ICMP:
2975 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2976 MLX5_FLEX_PROTO_ICMP)) {
2977 NL_SET_ERR_MSG_MOD(extack,
2978 "Match on Flex protocols for ICMP is not supported");
2979 return -EOPNOTSUPP;
2980 }
2981 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
2982 match.mask->type);
2983 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
2984 match.key->type);
2985 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
2986 match.mask->code);
2987 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
2988 match.key->code);
2989 break;
2990 case IPPROTO_ICMPV6:
2991 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2992 MLX5_FLEX_PROTO_ICMPV6)) {
2993 NL_SET_ERR_MSG_MOD(extack,
2994 "Match on Flex protocols for ICMPV6 is not supported");
2995 return -EOPNOTSUPP;
2996 }
2997 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
2998 match.mask->type);
2999 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
3000 match.key->type);
3001 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
3002 match.mask->code);
3003 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
3004 match.key->code);
3005 break;
3006 default:
3007 NL_SET_ERR_MSG_MOD(extack,
3008 "Code and type matching only with ICMP and ICMPv6");
3009 netdev_err(priv->netdev,
3010 "Code and type matching only with ICMP and ICMPv6\n");
3011 return -EINVAL;
3012 }
3013 if (match.mask->code || match.mask->type) {
3014 *match_level = MLX5_MATCH_L4;
3015 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
3016 }
3017 }
3018 /* Currently supported only for MPLS over UDP */
3019 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
3020 !netif_is_bareudp(filter_dev)) {
3021 NL_SET_ERR_MSG_MOD(extack,
3022 "Matching on MPLS is supported only for MPLS over UDP");
3023 netdev_err(priv->netdev,
3024 "Matching on MPLS is supported only for MPLS over UDP\n");
3025 return -EOPNOTSUPP;
3026 }
3027
3028 return 0;
3029 }
3030
parse_cls_flower(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct net_device * filter_dev)3031 static int parse_cls_flower(struct mlx5e_priv *priv,
3032 struct mlx5e_tc_flow *flow,
3033 struct mlx5_flow_spec *spec,
3034 struct flow_cls_offload *f,
3035 struct net_device *filter_dev)
3036 {
3037 u8 inner_match_level, outer_match_level, non_tunnel_match_level;
3038 struct netlink_ext_ack *extack = f->common.extack;
3039 struct mlx5_core_dev *dev = priv->mdev;
3040 struct mlx5_eswitch *esw = dev->priv.eswitch;
3041 struct mlx5e_rep_priv *rpriv = priv->ppriv;
3042 struct mlx5_eswitch_rep *rep;
3043 bool is_eswitch_flow;
3044 int err;
3045
3046 inner_match_level = MLX5_MATCH_NONE;
3047 outer_match_level = MLX5_MATCH_NONE;
3048
3049 err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
3050 &inner_match_level, &outer_match_level);
3051 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
3052 outer_match_level : inner_match_level;
3053
3054 is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
3055 if (!err && is_eswitch_flow) {
3056 rep = rpriv->rep;
3057 if (rep->vport != MLX5_VPORT_UPLINK &&
3058 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
3059 esw->offloads.inline_mode < non_tunnel_match_level)) {
3060 NL_SET_ERR_MSG_MOD(extack,
3061 "Flow is not offloaded due to min inline setting");
3062 netdev_warn(priv->netdev,
3063 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
3064 non_tunnel_match_level, esw->offloads.inline_mode);
3065 return -EOPNOTSUPP;
3066 }
3067 }
3068
3069 flow->attr->inner_match_level = inner_match_level;
3070 flow->attr->outer_match_level = outer_match_level;
3071
3072
3073 return err;
3074 }
3075
3076 struct mlx5_fields {
3077 u8 field;
3078 u8 field_bsize;
3079 u32 field_mask;
3080 u32 offset;
3081 u32 match_offset;
3082 };
3083
3084 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
3085 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
3086 offsetof(struct pedit_headers, field) + (off), \
3087 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
3088
3089 /* masked values are the same and there are no rewrites that do not have a
3090 * match.
3091 */
3092 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
3093 type matchmaskx = *(type *)(matchmaskp); \
3094 type matchvalx = *(type *)(matchvalp); \
3095 type maskx = *(type *)(maskp); \
3096 type valx = *(type *)(valp); \
3097 \
3098 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
3099 matchmaskx)); \
3100 })
3101
cmp_val_mask(void * valp,void * maskp,void * matchvalp,void * matchmaskp,u8 bsize)3102 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
3103 void *matchmaskp, u8 bsize)
3104 {
3105 bool same = false;
3106
3107 switch (bsize) {
3108 case 8:
3109 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
3110 break;
3111 case 16:
3112 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
3113 break;
3114 case 32:
3115 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
3116 break;
3117 }
3118
3119 return same;
3120 }
3121
3122 static struct mlx5_fields fields[] = {
3123 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
3124 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
3125 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
3126 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0),
3127 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype),
3128 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
3129
3130 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp),
3131 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit),
3132 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
3133 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
3134
3135 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
3136 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
3137 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
3138 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
3139 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
3140 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
3141 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
3142 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
3143 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
3144 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
3145 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
3146 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
3147 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
3148 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
3149 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
3150 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
3151 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
3152 OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp),
3153
3154 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
3155 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
3156 /* in linux iphdr tcp_flags is 8 bits long */
3157 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags),
3158
3159 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
3160 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
3161 };
3162
mask_field_get(void * mask,struct mlx5_fields * f)3163 static u32 mask_field_get(void *mask, struct mlx5_fields *f)
3164 {
3165 switch (f->field_bsize) {
3166 case 32:
3167 return be32_to_cpu(*(__be32 *)mask) & f->field_mask;
3168 case 16:
3169 return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask;
3170 default:
3171 return *(u8 *)mask & (u8)f->field_mask;
3172 }
3173 }
3174
mask_field_clear(void * mask,struct mlx5_fields * f)3175 static void mask_field_clear(void *mask, struct mlx5_fields *f)
3176 {
3177 switch (f->field_bsize) {
3178 case 32:
3179 *(__be32 *)mask &= ~cpu_to_be32(f->field_mask);
3180 break;
3181 case 16:
3182 *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask);
3183 break;
3184 default:
3185 *(u8 *)mask &= ~(u8)f->field_mask;
3186 break;
3187 }
3188 }
3189
offload_pedit_fields(struct mlx5e_priv * priv,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,u32 * action_flags,struct netlink_ext_ack * extack)3190 static int offload_pedit_fields(struct mlx5e_priv *priv,
3191 int namespace,
3192 struct mlx5e_tc_flow_parse_attr *parse_attr,
3193 u32 *action_flags,
3194 struct netlink_ext_ack *extack)
3195 {
3196 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
3197 struct pedit_headers_action *hdrs = parse_attr->hdrs;
3198 void *headers_c, *headers_v, *action, *vals_p;
3199 struct mlx5e_tc_mod_hdr_acts *mod_acts;
3200 void *s_masks_p, *a_masks_p;
3201 int i, first, last, next_z;
3202 struct mlx5_fields *f;
3203 unsigned long mask;
3204 u32 s_mask, a_mask;
3205 u8 cmd;
3206
3207 mod_acts = &parse_attr->mod_hdr_acts;
3208 headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
3209 headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
3210
3211 set_masks = &hdrs[0].masks;
3212 add_masks = &hdrs[1].masks;
3213 set_vals = &hdrs[0].vals;
3214 add_vals = &hdrs[1].vals;
3215
3216 for (i = 0; i < ARRAY_SIZE(fields); i++) {
3217 bool skip;
3218
3219 f = &fields[i];
3220 s_masks_p = (void *)set_masks + f->offset;
3221 a_masks_p = (void *)add_masks + f->offset;
3222
3223 s_mask = mask_field_get(s_masks_p, f);
3224 a_mask = mask_field_get(a_masks_p, f);
3225
3226 if (!s_mask && !a_mask) /* nothing to offload here */
3227 continue;
3228
3229 if (s_mask && a_mask) {
3230 NL_SET_ERR_MSG_MOD(extack,
3231 "can't set and add to the same HW field");
3232 netdev_warn(priv->netdev,
3233 "mlx5: can't set and add to the same HW field (%x)\n",
3234 f->field);
3235 return -EOPNOTSUPP;
3236 }
3237
3238 skip = false;
3239 if (s_mask) {
3240 void *match_mask = headers_c + f->match_offset;
3241 void *match_val = headers_v + f->match_offset;
3242
3243 cmd = MLX5_ACTION_TYPE_SET;
3244 mask = s_mask;
3245 vals_p = (void *)set_vals + f->offset;
3246 /* don't rewrite if we have a match on the same value */
3247 if (cmp_val_mask(vals_p, s_masks_p, match_val,
3248 match_mask, f->field_bsize))
3249 skip = true;
3250 /* clear to denote we consumed this field */
3251 mask_field_clear(s_masks_p, f);
3252 } else {
3253 cmd = MLX5_ACTION_TYPE_ADD;
3254 mask = a_mask;
3255 vals_p = (void *)add_vals + f->offset;
3256 /* add 0 is no change */
3257 if (!mask_field_get(vals_p, f))
3258 skip = true;
3259 /* clear to denote we consumed this field */
3260 mask_field_clear(a_masks_p, f);
3261 }
3262 if (skip)
3263 continue;
3264
3265 first = find_first_bit(&mask, f->field_bsize);
3266 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3267 last = find_last_bit(&mask, f->field_bsize);
3268 if (first < next_z && next_z < last) {
3269 NL_SET_ERR_MSG_MOD(extack,
3270 "rewrite of few sub-fields isn't supported");
3271 netdev_warn(priv->netdev,
3272 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3273 mask);
3274 return -EOPNOTSUPP;
3275 }
3276
3277 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
3278 if (IS_ERR(action)) {
3279 NL_SET_ERR_MSG_MOD(extack,
3280 "too many pedit actions, can't offload");
3281 mlx5_core_warn(priv->mdev,
3282 "mlx5: parsed %d pedit actions, can't do more\n",
3283 mod_acts->num_actions);
3284 return PTR_ERR(action);
3285 }
3286
3287 MLX5_SET(set_action_in, action, action_type, cmd);
3288 MLX5_SET(set_action_in, action, field, f->field);
3289
3290 if (cmd == MLX5_ACTION_TYPE_SET) {
3291 unsigned long field_mask = f->field_mask;
3292 int start;
3293
3294 /* if field is bit sized it can start not from first bit */
3295 start = find_first_bit(&field_mask, f->field_bsize);
3296
3297 MLX5_SET(set_action_in, action, offset, first - start);
3298 /* length is num of bits to be written, zero means length of 32 */
3299 MLX5_SET(set_action_in, action, length, (last - first + 1));
3300 }
3301
3302 if (f->field_bsize == 32)
3303 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3304 else if (f->field_bsize == 16)
3305 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3306 else if (f->field_bsize == 8)
3307 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3308
3309 ++mod_acts->num_actions;
3310 }
3311
3312 return 0;
3313 }
3314
3315 static const struct pedit_headers zero_masks = {};
3316
verify_offload_pedit_fields(struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct netlink_ext_ack * extack)3317 static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
3318 struct mlx5e_tc_flow_parse_attr *parse_attr,
3319 struct netlink_ext_ack *extack)
3320 {
3321 struct pedit_headers *cmd_masks;
3322 u8 cmd;
3323
3324 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3325 cmd_masks = &parse_attr->hdrs[cmd].masks;
3326 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3327 NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
3328 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3329 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3330 16, 1, cmd_masks, sizeof(zero_masks), true);
3331 return -EOPNOTSUPP;
3332 }
3333 }
3334
3335 return 0;
3336 }
3337
alloc_tc_pedit_action(struct mlx5e_priv * priv,int namespace,struct mlx5e_tc_flow_parse_attr * parse_attr,u32 * action_flags,struct netlink_ext_ack * extack)3338 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3339 struct mlx5e_tc_flow_parse_attr *parse_attr,
3340 u32 *action_flags,
3341 struct netlink_ext_ack *extack)
3342 {
3343 int err;
3344
3345 err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
3346 if (err)
3347 goto out_dealloc_parsed_actions;
3348
3349 err = verify_offload_pedit_fields(priv, parse_attr, extack);
3350 if (err)
3351 goto out_dealloc_parsed_actions;
3352
3353 return 0;
3354
3355 out_dealloc_parsed_actions:
3356 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3357 return err;
3358 }
3359
3360 struct ip_ttl_word {
3361 __u8 ttl;
3362 __u8 protocol;
3363 __sum16 check;
3364 };
3365
3366 struct ipv6_hoplimit_word {
3367 __be16 payload_len;
3368 __u8 nexthdr;
3369 __u8 hop_limit;
3370 };
3371
3372 static bool
is_flow_action_modify_ip_header(struct flow_action * flow_action)3373 is_flow_action_modify_ip_header(struct flow_action *flow_action)
3374 {
3375 const struct flow_action_entry *act;
3376 u32 mask, offset;
3377 u8 htype;
3378 int i;
3379
3380 /* For IPv4 & IPv6 header check 4 byte word,
3381 * to determine that modified fields
3382 * are NOT ttl & hop_limit only.
3383 */
3384 flow_action_for_each(i, act, flow_action) {
3385 if (act->id != FLOW_ACTION_MANGLE &&
3386 act->id != FLOW_ACTION_ADD)
3387 continue;
3388
3389 htype = act->mangle.htype;
3390 offset = act->mangle.offset;
3391 mask = ~act->mangle.mask;
3392
3393 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3394 struct ip_ttl_word *ttl_word =
3395 (struct ip_ttl_word *)&mask;
3396
3397 if (offset != offsetof(struct iphdr, ttl) ||
3398 ttl_word->protocol ||
3399 ttl_word->check)
3400 return true;
3401 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3402 struct ipv6_hoplimit_word *hoplimit_word =
3403 (struct ipv6_hoplimit_word *)&mask;
3404
3405 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3406 hoplimit_word->payload_len ||
3407 hoplimit_word->nexthdr)
3408 return true;
3409 }
3410 }
3411
3412 return false;
3413 }
3414
modify_header_match_supported(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_action * flow_action,u32 actions,struct netlink_ext_ack * extack)3415 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3416 struct mlx5_flow_spec *spec,
3417 struct flow_action *flow_action,
3418 u32 actions,
3419 struct netlink_ext_ack *extack)
3420 {
3421 bool modify_ip_header;
3422 void *headers_c;
3423 void *headers_v;
3424 u16 ethertype;
3425 u8 ip_proto;
3426
3427 headers_c = mlx5e_get_match_headers_criteria(actions, spec);
3428 headers_v = mlx5e_get_match_headers_value(actions, spec);
3429 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3430
3431 /* for non-IP we only re-write MACs, so we're okay */
3432 if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3433 ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3434 goto out_ok;
3435
3436 modify_ip_header = is_flow_action_modify_ip_header(flow_action);
3437 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3438 if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3439 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3440 NL_SET_ERR_MSG_MOD(extack,
3441 "can't offload re-write of non TCP/UDP");
3442 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3443 ip_proto);
3444 return false;
3445 }
3446
3447 out_ok:
3448 return true;
3449 }
3450
3451 static bool
actions_match_supported_fdb(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3452 actions_match_supported_fdb(struct mlx5e_priv *priv,
3453 struct mlx5e_tc_flow *flow,
3454 struct netlink_ext_ack *extack)
3455 {
3456 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3457
3458 if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3459 NL_SET_ERR_MSG_MOD(extack,
3460 "current firmware doesn't support split rule for port mirroring");
3461 netdev_warn_once(priv->netdev,
3462 "current firmware doesn't support split rule for port mirroring\n");
3463 return false;
3464 }
3465
3466 return true;
3467 }
3468
3469 static bool
actions_match_supported(struct mlx5e_priv * priv,struct flow_action * flow_action,u32 actions,struct mlx5e_tc_flow_parse_attr * parse_attr,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3470 actions_match_supported(struct mlx5e_priv *priv,
3471 struct flow_action *flow_action,
3472 u32 actions,
3473 struct mlx5e_tc_flow_parse_attr *parse_attr,
3474 struct mlx5e_tc_flow *flow,
3475 struct netlink_ext_ack *extack)
3476 {
3477 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3478 !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions,
3479 extack))
3480 return false;
3481
3482 if (mlx5e_is_eswitch_flow(flow) &&
3483 !actions_match_supported_fdb(priv, flow, extack))
3484 return false;
3485
3486 return true;
3487 }
3488
same_port_devs(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)3489 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3490 {
3491 return priv->mdev == peer_priv->mdev;
3492 }
3493
mlx5e_same_hw_devs(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)3494 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3495 {
3496 struct mlx5_core_dev *fmdev, *pmdev;
3497 u64 fsystem_guid, psystem_guid;
3498
3499 fmdev = priv->mdev;
3500 pmdev = peer_priv->mdev;
3501
3502 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3503 psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3504
3505 return (fsystem_guid == psystem_guid);
3506 }
3507
3508 static int
actions_prepare_mod_hdr_actions(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack)3509 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3510 struct mlx5e_tc_flow *flow,
3511 struct mlx5_flow_attr *attr,
3512 struct netlink_ext_ack *extack)
3513 {
3514 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3515 struct pedit_headers_action *hdrs = parse_attr->hdrs;
3516 enum mlx5_flow_namespace_type ns_type;
3517 int err;
3518
3519 if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3520 !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3521 return 0;
3522
3523 ns_type = mlx5e_get_flow_namespace(flow);
3524
3525 err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
3526 if (err)
3527 return err;
3528
3529 if (parse_attr->mod_hdr_acts.num_actions > 0)
3530 return 0;
3531
3532 /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3533 attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3534 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3535
3536 if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3537 return 0;
3538
3539 if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3540 (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3541 attr->esw_attr->split_count = 0;
3542
3543 return 0;
3544 }
3545
3546 static struct mlx5_flow_attr*
mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr * attr,enum mlx5_flow_namespace_type ns_type)3547 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
3548 enum mlx5_flow_namespace_type ns_type)
3549 {
3550 struct mlx5e_tc_flow_parse_attr *parse_attr;
3551 u32 attr_sz = ns_to_attr_sz(ns_type);
3552 struct mlx5_flow_attr *attr2;
3553
3554 attr2 = mlx5_alloc_flow_attr(ns_type);
3555 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3556 if (!attr2 || !parse_attr) {
3557 kvfree(parse_attr);
3558 kfree(attr2);
3559 return NULL;
3560 }
3561
3562 memcpy(attr2, attr, attr_sz);
3563 INIT_LIST_HEAD(&attr2->list);
3564 parse_attr->filter_dev = attr->parse_attr->filter_dev;
3565 attr2->action = 0;
3566 attr2->counter = NULL;
3567 attr2->tc_act_cookies_count = 0;
3568 attr2->flags = 0;
3569 attr2->parse_attr = parse_attr;
3570 attr2->dest_chain = 0;
3571 attr2->dest_ft = NULL;
3572 attr2->act_id_restore_rule = NULL;
3573 memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr));
3574
3575 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
3576 attr2->esw_attr->out_count = 0;
3577 attr2->esw_attr->split_count = 0;
3578 }
3579
3580 attr2->branch_true = NULL;
3581 attr2->branch_false = NULL;
3582 attr2->jumping_attr = NULL;
3583 return attr2;
3584 }
3585
3586 struct mlx5_flow_attr *
mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow * flow)3587 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
3588 {
3589 struct mlx5_esw_flow_attr *esw_attr;
3590 struct mlx5_flow_attr *attr;
3591 int i;
3592
3593 list_for_each_entry(attr, &flow->attrs, list) {
3594 esw_attr = attr->esw_attr;
3595 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
3596 if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
3597 return attr;
3598 }
3599 }
3600
3601 return NULL;
3602 }
3603
3604 void
mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow * flow)3605 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
3606 {
3607 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3608 struct mlx5_flow_attr *attr;
3609
3610 list_for_each_entry(attr, &flow->attrs, list) {
3611 if (list_is_last(&attr->list, &flow->attrs))
3612 break;
3613
3614 mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
3615 }
3616 }
3617
3618 static void
free_flow_post_acts(struct mlx5e_tc_flow * flow)3619 free_flow_post_acts(struct mlx5e_tc_flow *flow)
3620 {
3621 struct mlx5_flow_attr *attr, *tmp;
3622
3623 list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
3624 if (list_is_last(&attr->list, &flow->attrs))
3625 break;
3626
3627 mlx5_free_flow_attr_actions(flow, attr);
3628
3629 list_del(&attr->list);
3630 kvfree(attr->parse_attr);
3631 kfree(attr);
3632 }
3633 }
3634
3635 int
mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow * flow)3636 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
3637 {
3638 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3639 struct mlx5_flow_attr *attr;
3640 int err = 0;
3641
3642 list_for_each_entry(attr, &flow->attrs, list) {
3643 if (list_is_last(&attr->list, &flow->attrs))
3644 break;
3645
3646 err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
3647 if (err)
3648 break;
3649 }
3650
3651 return err;
3652 }
3653
3654 /* TC filter rule HW translation:
3655 *
3656 * +---------------------+
3657 * + ft prio (tc chain) +
3658 * + original match +
3659 * +---------------------+
3660 * |
3661 * | if multi table action
3662 * |
3663 * v
3664 * +---------------------+
3665 * + post act ft |<----.
3666 * + match fte id | | split on multi table action
3667 * + do actions |-----'
3668 * +---------------------+
3669 * |
3670 * |
3671 * v
3672 * Do rest of the actions after last multi table action.
3673 */
3674 static int
alloc_flow_post_acts(struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)3675 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
3676 {
3677 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3678 struct mlx5_flow_attr *attr, *next_attr = NULL;
3679 struct mlx5e_post_act_handle *handle;
3680 int err;
3681
3682 /* This is going in reverse order as needed.
3683 * The first entry is the last attribute.
3684 */
3685 list_for_each_entry(attr, &flow->attrs, list) {
3686 if (!next_attr) {
3687 /* Set counter action on last post act rule. */
3688 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3689 }
3690
3691 if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) {
3692 err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
3693 if (err)
3694 goto out_free;
3695 }
3696
3697 /* Don't add post_act rule for first attr (last in the list).
3698 * It's being handled by the caller.
3699 */
3700 if (list_is_last(&attr->list, &flow->attrs))
3701 break;
3702
3703 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
3704 if (err)
3705 goto out_free;
3706
3707 err = post_process_attr(flow, attr, extack);
3708 if (err)
3709 goto out_free;
3710
3711 handle = mlx5e_tc_post_act_add(post_act, attr);
3712 if (IS_ERR(handle)) {
3713 err = PTR_ERR(handle);
3714 goto out_free;
3715 }
3716
3717 attr->post_act_handle = handle;
3718
3719 if (attr->jumping_attr) {
3720 err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr);
3721 if (err)
3722 goto out_free;
3723 }
3724
3725 next_attr = attr;
3726 }
3727
3728 if (flow_flag_test(flow, SLOW))
3729 goto out;
3730
3731 err = mlx5e_tc_offload_flow_post_acts(flow);
3732 if (err)
3733 goto out_free;
3734
3735 out:
3736 return 0;
3737
3738 out_free:
3739 free_flow_post_acts(flow);
3740 return err;
3741 }
3742
3743 static int
set_branch_dest_ft(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr)3744 set_branch_dest_ft(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr)
3745 {
3746 struct mlx5e_post_act *post_act = get_post_action(priv);
3747
3748 if (IS_ERR(post_act))
3749 return PTR_ERR(post_act);
3750
3751 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3752 attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act);
3753
3754 return 0;
3755 }
3756
3757 static int
alloc_branch_attr(struct mlx5e_tc_flow * flow,struct mlx5e_tc_act_branch_ctrl * cond,struct mlx5_flow_attr ** cond_attr,u32 * jump_count,struct netlink_ext_ack * extack)3758 alloc_branch_attr(struct mlx5e_tc_flow *flow,
3759 struct mlx5e_tc_act_branch_ctrl *cond,
3760 struct mlx5_flow_attr **cond_attr,
3761 u32 *jump_count,
3762 struct netlink_ext_ack *extack)
3763 {
3764 struct mlx5_flow_attr *attr;
3765 int err = 0;
3766
3767 *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr,
3768 mlx5e_get_flow_namespace(flow));
3769 if (!(*cond_attr))
3770 return -ENOMEM;
3771
3772 attr = *cond_attr;
3773
3774 switch (cond->act_id) {
3775 case FLOW_ACTION_DROP:
3776 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3777 break;
3778 case FLOW_ACTION_ACCEPT:
3779 case FLOW_ACTION_PIPE:
3780 err = set_branch_dest_ft(flow->priv, attr);
3781 if (err)
3782 goto out_err;
3783 break;
3784 case FLOW_ACTION_JUMP:
3785 if (*jump_count) {
3786 NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps");
3787 err = -EOPNOTSUPP;
3788 goto out_err;
3789 }
3790 *jump_count = cond->extval;
3791 err = set_branch_dest_ft(flow->priv, attr);
3792 if (err)
3793 goto out_err;
3794 break;
3795 default:
3796 err = -EOPNOTSUPP;
3797 goto out_err;
3798 }
3799
3800 return err;
3801 out_err:
3802 kfree(*cond_attr);
3803 *cond_attr = NULL;
3804 return err;
3805 }
3806
3807 static void
dec_jump_count(struct flow_action_entry * act,struct mlx5e_tc_act * tc_act,struct mlx5_flow_attr * attr,struct mlx5e_priv * priv,struct mlx5e_tc_jump_state * jump_state)3808 dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
3809 struct mlx5_flow_attr *attr, struct mlx5e_priv *priv,
3810 struct mlx5e_tc_jump_state *jump_state)
3811 {
3812 if (!jump_state->jump_count)
3813 return;
3814
3815 /* Single tc action can instantiate multiple offload actions (e.g. pedit)
3816 * Jump only over a tc action
3817 */
3818 if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index)
3819 return;
3820
3821 jump_state->last_id = act->id;
3822 jump_state->last_index = act->hw_index;
3823
3824 /* nothing to do for intermediate actions */
3825 if (--jump_state->jump_count > 1)
3826 return;
3827
3828 if (jump_state->jump_count == 1) { /* last action in the jump action list */
3829
3830 /* create a new attribute after this action */
3831 jump_state->jump_target = true;
3832
3833 if (tc_act->is_terminating_action) { /* the branch ends here */
3834 attr->flags |= MLX5_ATTR_FLAG_TERMINATING;
3835 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3836 } else { /* the branch continues executing the rest of the actions */
3837 struct mlx5e_post_act *post_act;
3838
3839 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3840 post_act = get_post_action(priv);
3841 attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act);
3842 }
3843 } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */
3844 /* This is the post action for the jumping attribute (either red or green)
3845 * Use the stored jumping_attr to set the post act id on the jumping attribute
3846 */
3847 attr->jumping_attr = jump_state->jumping_attr;
3848 }
3849 }
3850
3851 static int
parse_branch_ctrl(struct flow_action_entry * act,struct mlx5e_tc_act * tc_act,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_tc_jump_state * jump_state,struct netlink_ext_ack * extack)3852 parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
3853 struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr,
3854 struct mlx5e_tc_jump_state *jump_state,
3855 struct netlink_ext_ack *extack)
3856 {
3857 struct mlx5e_tc_act_branch_ctrl cond_true, cond_false;
3858 u32 jump_count = jump_state->jump_count;
3859 int err;
3860
3861 if (!tc_act->get_branch_ctrl)
3862 return 0;
3863
3864 tc_act->get_branch_ctrl(act, &cond_true, &cond_false);
3865
3866 err = alloc_branch_attr(flow, &cond_true,
3867 &attr->branch_true, &jump_count, extack);
3868 if (err)
3869 goto out_err;
3870
3871 if (jump_count)
3872 jump_state->jumping_attr = attr->branch_true;
3873
3874 err = alloc_branch_attr(flow, &cond_false,
3875 &attr->branch_false, &jump_count, extack);
3876 if (err)
3877 goto err_branch_false;
3878
3879 if (jump_count && !jump_state->jumping_attr)
3880 jump_state->jumping_attr = attr->branch_false;
3881
3882 jump_state->jump_count = jump_count;
3883
3884 /* branching action requires its own counter */
3885 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3886 flow_flag_set(flow, USE_ACT_STATS);
3887
3888 return 0;
3889
3890 err_branch_false:
3891 free_branch_attr(flow, attr->branch_true);
3892 out_err:
3893 return err;
3894 }
3895
3896 static int
parse_tc_actions(struct mlx5e_tc_act_parse_state * parse_state,struct flow_action * flow_action)3897 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
3898 struct flow_action *flow_action)
3899 {
3900 struct netlink_ext_ack *extack = parse_state->extack;
3901 struct mlx5e_tc_flow *flow = parse_state->flow;
3902 struct mlx5e_tc_jump_state jump_state = {};
3903 struct mlx5_flow_attr *attr = flow->attr;
3904 enum mlx5_flow_namespace_type ns_type;
3905 struct mlx5e_priv *priv = flow->priv;
3906 struct mlx5_flow_attr *prev_attr;
3907 struct flow_action_entry *act;
3908 struct mlx5e_tc_act *tc_act;
3909 int err, i, i_split = 0;
3910 bool is_missable;
3911
3912 ns_type = mlx5e_get_flow_namespace(flow);
3913 list_add(&attr->list, &flow->attrs);
3914
3915 flow_action_for_each(i, act, flow_action) {
3916 jump_state.jump_target = false;
3917 is_missable = false;
3918 prev_attr = attr;
3919
3920 tc_act = mlx5e_tc_act_get(act->id, ns_type);
3921 if (!tc_act) {
3922 NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
3923 err = -EOPNOTSUPP;
3924 goto out_free_post_acts;
3925 }
3926
3927 if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) {
3928 err = -EOPNOTSUPP;
3929 goto out_free_post_acts;
3930 }
3931
3932 err = tc_act->parse_action(parse_state, act, priv, attr);
3933 if (err)
3934 goto out_free_post_acts;
3935
3936 dec_jump_count(act, tc_act, attr, priv, &jump_state);
3937
3938 err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack);
3939 if (err)
3940 goto out_free_post_acts;
3941
3942 parse_state->actions |= attr->action;
3943
3944 /* Split attr for multi table act if not the last act. */
3945 if (jump_state.jump_target ||
3946 (tc_act->is_multi_table_act &&
3947 tc_act->is_multi_table_act(priv, act, attr) &&
3948 i < flow_action->num_entries - 1)) {
3949 is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false;
3950
3951 err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr,
3952 ns_type);
3953 if (err)
3954 goto out_free_post_acts;
3955
3956 attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
3957 if (!attr) {
3958 err = -ENOMEM;
3959 goto out_free_post_acts;
3960 }
3961
3962 i_split = i + 1;
3963 parse_state->if_count = 0;
3964 list_add(&attr->list, &flow->attrs);
3965 }
3966
3967 if (is_missable) {
3968 /* Add counter to prev, and assign act to new (next) attr */
3969 prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3970 flow_flag_set(flow, USE_ACT_STATS);
3971
3972 attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie;
3973 } else if (!tc_act->stats_action) {
3974 prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie;
3975 }
3976 }
3977
3978 err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, ns_type);
3979 if (err)
3980 goto out_free_post_acts;
3981
3982 err = alloc_flow_post_acts(flow, extack);
3983 if (err)
3984 goto out_free_post_acts;
3985
3986 return 0;
3987
3988 out_free_post_acts:
3989 free_flow_post_acts(flow);
3990
3991 return err;
3992 }
3993
3994 static int
flow_action_supported(struct flow_action * flow_action,struct netlink_ext_ack * extack)3995 flow_action_supported(struct flow_action *flow_action,
3996 struct netlink_ext_ack *extack)
3997 {
3998 if (!flow_action_has_entries(flow_action)) {
3999 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
4000 return -EINVAL;
4001 }
4002
4003 if (!flow_action_hw_stats_check(flow_action, extack,
4004 FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
4005 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4006 return -EOPNOTSUPP;
4007 }
4008
4009 return 0;
4010 }
4011
4012 static int
parse_tc_nic_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)4013 parse_tc_nic_actions(struct mlx5e_priv *priv,
4014 struct flow_action *flow_action,
4015 struct mlx5e_tc_flow *flow,
4016 struct netlink_ext_ack *extack)
4017 {
4018 struct mlx5e_tc_act_parse_state *parse_state;
4019 struct mlx5e_tc_flow_parse_attr *parse_attr;
4020 struct mlx5_flow_attr *attr = flow->attr;
4021 int err;
4022
4023 err = flow_action_supported(flow_action, extack);
4024 if (err)
4025 return err;
4026
4027 attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
4028 parse_attr = attr->parse_attr;
4029 parse_state = &parse_attr->parse_state;
4030 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4031 parse_state->ct_priv = get_ct_priv(priv);
4032
4033 err = parse_tc_actions(parse_state, flow_action);
4034 if (err)
4035 return err;
4036
4037 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4038 if (err)
4039 return err;
4040
4041 err = verify_attr_actions(attr->action, extack);
4042 if (err)
4043 return err;
4044
4045 if (!actions_match_supported(priv, flow_action, parse_state->actions,
4046 parse_attr, flow, extack))
4047 return -EOPNOTSUPP;
4048
4049 return 0;
4050 }
4051
is_merged_eswitch_vfs(struct mlx5e_priv * priv,struct net_device * peer_netdev)4052 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
4053 struct net_device *peer_netdev)
4054 {
4055 struct mlx5e_priv *peer_priv;
4056
4057 peer_priv = netdev_priv(peer_netdev);
4058
4059 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
4060 mlx5e_eswitch_vf_rep(priv->netdev) &&
4061 mlx5e_eswitch_vf_rep(peer_netdev) &&
4062 mlx5e_same_hw_devs(priv, peer_priv));
4063 }
4064
same_hw_reps(struct mlx5e_priv * priv,struct net_device * peer_netdev)4065 static bool same_hw_reps(struct mlx5e_priv *priv,
4066 struct net_device *peer_netdev)
4067 {
4068 struct mlx5e_priv *peer_priv;
4069
4070 peer_priv = netdev_priv(peer_netdev);
4071
4072 return mlx5e_eswitch_rep(priv->netdev) &&
4073 mlx5e_eswitch_rep(peer_netdev) &&
4074 mlx5e_same_hw_devs(priv, peer_priv);
4075 }
4076
is_lag_dev(struct mlx5e_priv * priv,struct net_device * peer_netdev)4077 static bool is_lag_dev(struct mlx5e_priv *priv,
4078 struct net_device *peer_netdev)
4079 {
4080 return ((mlx5_lag_is_sriov(priv->mdev) ||
4081 mlx5_lag_is_multipath(priv->mdev)) &&
4082 same_hw_reps(priv, peer_netdev));
4083 }
4084
is_multiport_eligible(struct mlx5e_priv * priv,struct net_device * out_dev)4085 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
4086 {
4087 return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev);
4088 }
4089
mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv * priv,struct net_device * out_dev)4090 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4091 struct net_device *out_dev)
4092 {
4093 if (is_merged_eswitch_vfs(priv, out_dev))
4094 return true;
4095
4096 if (is_multiport_eligible(priv, out_dev))
4097 return true;
4098
4099 if (is_lag_dev(priv, out_dev))
4100 return true;
4101
4102 return mlx5e_eswitch_rep(out_dev) &&
4103 same_port_devs(priv, netdev_priv(out_dev));
4104 }
4105
mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,int ifindex,enum mlx5e_tc_int_port_type type,u32 * action,int out_index)4106 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
4107 struct mlx5_flow_attr *attr,
4108 int ifindex,
4109 enum mlx5e_tc_int_port_type type,
4110 u32 *action,
4111 int out_index)
4112 {
4113 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4114 struct mlx5e_tc_int_port_priv *int_port_priv;
4115 struct mlx5e_tc_flow_parse_attr *parse_attr;
4116 struct mlx5e_tc_int_port *dest_int_port;
4117 int err;
4118
4119 parse_attr = attr->parse_attr;
4120 int_port_priv = mlx5e_get_int_port_priv(priv);
4121
4122 dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
4123 if (IS_ERR(dest_int_port))
4124 return PTR_ERR(dest_int_port);
4125
4126 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
4127 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
4128 mlx5e_tc_int_port_get_metadata(dest_int_port));
4129 if (err) {
4130 mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
4131 return err;
4132 }
4133
4134 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4135
4136 esw_attr->dest_int_port = dest_int_port;
4137 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
4138 esw_attr->split_count = out_index;
4139
4140 /* Forward to root fdb for matching against the new source vport */
4141 attr->dest_chain = 0;
4142
4143 return 0;
4144 }
4145
4146 static int
parse_tc_fdb_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)4147 parse_tc_fdb_actions(struct mlx5e_priv *priv,
4148 struct flow_action *flow_action,
4149 struct mlx5e_tc_flow *flow,
4150 struct netlink_ext_ack *extack)
4151 {
4152 struct mlx5e_tc_act_parse_state *parse_state;
4153 struct mlx5e_tc_flow_parse_attr *parse_attr;
4154 struct mlx5_flow_attr *attr = flow->attr;
4155 struct mlx5_esw_flow_attr *esw_attr;
4156 struct net_device *filter_dev;
4157 int err;
4158
4159 err = flow_action_supported(flow_action, extack);
4160 if (err)
4161 return err;
4162
4163 esw_attr = attr->esw_attr;
4164 parse_attr = attr->parse_attr;
4165 filter_dev = parse_attr->filter_dev;
4166 parse_state = &parse_attr->parse_state;
4167 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4168 parse_state->ct_priv = get_ct_priv(priv);
4169
4170 err = parse_tc_actions(parse_state, flow_action);
4171 if (err)
4172 return err;
4173
4174 /* Forward to/from internal port can only have 1 dest */
4175 if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) &&
4176 esw_attr->out_count > 1) {
4177 NL_SET_ERR_MSG_MOD(extack,
4178 "Rules with internal port can have only one destination");
4179 return -EOPNOTSUPP;
4180 }
4181
4182 /* Forward from tunnel/internal port to internal port is not supported */
4183 if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) &&
4184 esw_attr->dest_int_port) {
4185 NL_SET_ERR_MSG_MOD(extack,
4186 "Forwarding from tunnel/internal port to internal port is not supported");
4187 return -EOPNOTSUPP;
4188 }
4189
4190 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4191 if (err)
4192 return err;
4193
4194 if (!actions_match_supported(priv, flow_action, parse_state->actions,
4195 parse_attr, flow, extack))
4196 return -EOPNOTSUPP;
4197
4198 return 0;
4199 }
4200
get_flags(int flags,unsigned long * flow_flags)4201 static void get_flags(int flags, unsigned long *flow_flags)
4202 {
4203 unsigned long __flow_flags = 0;
4204
4205 if (flags & MLX5_TC_FLAG(INGRESS))
4206 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4207 if (flags & MLX5_TC_FLAG(EGRESS))
4208 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4209
4210 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4211 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4212 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4213 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4214 if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4215 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4216
4217 *flow_flags = __flow_flags;
4218 }
4219
4220 static const struct rhashtable_params tc_ht_params = {
4221 .head_offset = offsetof(struct mlx5e_tc_flow, node),
4222 .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4223 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4224 .automatic_shrinking = true,
4225 };
4226
get_tc_ht(struct mlx5e_priv * priv,unsigned long flags)4227 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4228 unsigned long flags)
4229 {
4230 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4231 struct mlx5e_rep_priv *rpriv;
4232
4233 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4234 rpriv = priv->ppriv;
4235 return &rpriv->tc_ht;
4236 } else /* NIC offload */
4237 return &tc->ht;
4238 }
4239
is_peer_flow_needed(struct mlx5e_tc_flow * flow)4240 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4241 {
4242 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4243 struct mlx5_flow_attr *attr = flow->attr;
4244 bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4245 flow_flag_test(flow, INGRESS);
4246 bool act_is_encap = !!(attr->action &
4247 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4248 bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.eswitch->devcom);
4249
4250 if (!esw_paired)
4251 return false;
4252
4253 if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4254 mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4255 (is_rep_ingress || act_is_encap))
4256 return true;
4257
4258 if (mlx5_lag_is_mpesw(esw_attr->in_mdev))
4259 return true;
4260
4261 return false;
4262 }
4263
4264 struct mlx5_flow_attr *
mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)4265 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4266 {
4267 u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ?
4268 sizeof(struct mlx5_esw_flow_attr) :
4269 sizeof(struct mlx5_nic_flow_attr);
4270 struct mlx5_flow_attr *attr;
4271
4272 attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4273 if (!attr)
4274 return attr;
4275
4276 INIT_LIST_HEAD(&attr->list);
4277 return attr;
4278 }
4279
4280 static void
mlx5_free_flow_attr_actions(struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)4281 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
4282 {
4283 struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
4284 struct mlx5_esw_flow_attr *esw_attr;
4285
4286 if (!attr)
4287 return;
4288
4289 if (attr->post_act_handle)
4290 mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle);
4291
4292 mlx5e_tc_tun_encap_dests_unset(flow->priv, flow, attr);
4293
4294 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
4295 mlx5_fc_destroy(counter_dev, attr->counter);
4296
4297 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
4298 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
4299 mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
4300 }
4301
4302 if (mlx5e_is_eswitch_flow(flow)) {
4303 esw_attr = attr->esw_attr;
4304
4305 if (esw_attr->int_port)
4306 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
4307 esw_attr->int_port);
4308
4309 if (esw_attr->dest_int_port)
4310 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
4311 esw_attr->dest_int_port);
4312 }
4313
4314 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
4315
4316 free_branch_attr(flow, attr->branch_true);
4317 free_branch_attr(flow, attr->branch_false);
4318 }
4319
4320 static int
mlx5e_alloc_flow(struct mlx5e_priv * priv,int attr_size,struct flow_cls_offload * f,unsigned long flow_flags,struct mlx5e_tc_flow_parse_attr ** __parse_attr,struct mlx5e_tc_flow ** __flow)4321 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4322 struct flow_cls_offload *f, unsigned long flow_flags,
4323 struct mlx5e_tc_flow_parse_attr **__parse_attr,
4324 struct mlx5e_tc_flow **__flow)
4325 {
4326 struct mlx5e_tc_flow_parse_attr *parse_attr;
4327 struct mlx5_flow_attr *attr;
4328 struct mlx5e_tc_flow *flow;
4329 int err = -ENOMEM;
4330 int out_index;
4331
4332 flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4333 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4334 if (!parse_attr || !flow)
4335 goto err_free;
4336
4337 flow->flags = flow_flags;
4338 flow->cookie = f->cookie;
4339 flow->priv = priv;
4340
4341 attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
4342 if (!attr)
4343 goto err_free;
4344
4345 flow->attr = attr;
4346
4347 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4348 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4349 INIT_LIST_HEAD(&flow->hairpin);
4350 INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4351 INIT_LIST_HEAD(&flow->attrs);
4352 INIT_LIST_HEAD(&flow->peer_flows);
4353 refcount_set(&flow->refcnt, 1);
4354 init_completion(&flow->init_done);
4355 init_completion(&flow->del_hw_done);
4356
4357 *__flow = flow;
4358 *__parse_attr = parse_attr;
4359
4360 return 0;
4361
4362 err_free:
4363 kfree(flow);
4364 kvfree(parse_attr);
4365 return err;
4366 }
4367
4368 static void
mlx5e_flow_attr_init(struct mlx5_flow_attr * attr,struct mlx5e_tc_flow_parse_attr * parse_attr,struct flow_cls_offload * f)4369 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4370 struct mlx5e_tc_flow_parse_attr *parse_attr,
4371 struct flow_cls_offload *f)
4372 {
4373 attr->parse_attr = parse_attr;
4374 attr->chain = f->common.chain_index;
4375 attr->prio = f->common.prio;
4376 }
4377
4378 static void
mlx5e_flow_esw_attr_init(struct mlx5_flow_attr * attr,struct mlx5e_priv * priv,struct mlx5e_tc_flow_parse_attr * parse_attr,struct flow_cls_offload * f,struct mlx5_eswitch_rep * in_rep,struct mlx5_core_dev * in_mdev)4379 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4380 struct mlx5e_priv *priv,
4381 struct mlx5e_tc_flow_parse_attr *parse_attr,
4382 struct flow_cls_offload *f,
4383 struct mlx5_eswitch_rep *in_rep,
4384 struct mlx5_core_dev *in_mdev)
4385 {
4386 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4387 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4388
4389 mlx5e_flow_attr_init(attr, parse_attr, f);
4390
4391 esw_attr->in_rep = in_rep;
4392 esw_attr->in_mdev = in_mdev;
4393
4394 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4395 MLX5_COUNTER_SOURCE_ESWITCH)
4396 esw_attr->counter_dev = in_mdev;
4397 else
4398 esw_attr->counter_dev = priv->mdev;
4399 }
4400
4401 static struct mlx5e_tc_flow *
__mlx5e_add_fdb_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5_eswitch_rep * in_rep,struct mlx5_core_dev * in_mdev)4402 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4403 struct flow_cls_offload *f,
4404 unsigned long flow_flags,
4405 struct net_device *filter_dev,
4406 struct mlx5_eswitch_rep *in_rep,
4407 struct mlx5_core_dev *in_mdev)
4408 {
4409 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4410 struct netlink_ext_ack *extack = f->common.extack;
4411 struct mlx5e_tc_flow_parse_attr *parse_attr;
4412 struct mlx5e_tc_flow *flow;
4413 int attr_size, err;
4414
4415 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4416 attr_size = sizeof(struct mlx5_esw_flow_attr);
4417 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4418 &parse_attr, &flow);
4419 if (err)
4420 goto out;
4421
4422 parse_attr->filter_dev = filter_dev;
4423 mlx5e_flow_esw_attr_init(flow->attr,
4424 priv, parse_attr,
4425 f, in_rep, in_mdev);
4426
4427 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4428 f, filter_dev);
4429 if (err)
4430 goto err_free;
4431
4432 /* actions validation depends on parsing the ct matches first */
4433 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4434 &flow->attr->ct_attr, extack);
4435 if (err)
4436 goto err_free;
4437
4438 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4439 if (err)
4440 goto err_free;
4441
4442 err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4443 complete_all(&flow->init_done);
4444 if (err) {
4445 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4446 goto err_free;
4447
4448 add_unready_flow(flow);
4449 }
4450
4451 return flow;
4452
4453 err_free:
4454 mlx5e_flow_put(priv, flow);
4455 out:
4456 return ERR_PTR(err);
4457 }
4458
mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload * f,struct mlx5e_tc_flow * flow,unsigned long flow_flags,struct mlx5_eswitch * peer_esw)4459 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4460 struct mlx5e_tc_flow *flow,
4461 unsigned long flow_flags,
4462 struct mlx5_eswitch *peer_esw)
4463 {
4464 struct mlx5e_priv *priv = flow->priv, *peer_priv;
4465 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4466 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4467 struct mlx5e_tc_flow_parse_attr *parse_attr;
4468 int i = mlx5_get_dev_index(peer_esw->dev);
4469 struct mlx5e_rep_priv *peer_urpriv;
4470 struct mlx5e_tc_flow *peer_flow;
4471 struct mlx5_core_dev *in_mdev;
4472 int err = 0;
4473
4474 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4475 peer_priv = netdev_priv(peer_urpriv->netdev);
4476
4477 /* in_mdev is assigned of which the packet originated from.
4478 * So packets redirected to uplink use the same mdev of the
4479 * original flow and packets redirected from uplink use the
4480 * peer mdev.
4481 * In multiport eswitch it's a special case that we need to
4482 * keep the original mdev.
4483 */
4484 if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev))
4485 in_mdev = peer_priv->mdev;
4486 else
4487 in_mdev = priv->mdev;
4488
4489 parse_attr = flow->attr->parse_attr;
4490 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4491 parse_attr->filter_dev,
4492 attr->in_rep, in_mdev);
4493 if (IS_ERR(peer_flow)) {
4494 err = PTR_ERR(peer_flow);
4495 goto out;
4496 }
4497
4498 list_add_tail(&peer_flow->peer_flows, &flow->peer_flows);
4499 flow_flag_set(flow, DUP);
4500 mutex_lock(&esw->offloads.peer_mutex);
4501 list_add_tail(&flow->peer[i], &esw->offloads.peer_flows[i]);
4502 mutex_unlock(&esw->offloads.peer_mutex);
4503
4504 out:
4505 return err;
4506 }
4507
4508 static int
mlx5e_add_fdb_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** __flow)4509 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4510 struct flow_cls_offload *f,
4511 unsigned long flow_flags,
4512 struct net_device *filter_dev,
4513 struct mlx5e_tc_flow **__flow)
4514 {
4515 struct mlx5_devcom_comp_dev *devcom = priv->mdev->priv.eswitch->devcom, *pos;
4516 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4517 struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4518 struct mlx5_core_dev *in_mdev = priv->mdev;
4519 struct mlx5_eswitch *peer_esw;
4520 struct mlx5e_tc_flow *flow;
4521 int err;
4522
4523 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4524 in_mdev);
4525 if (IS_ERR(flow))
4526 return PTR_ERR(flow);
4527
4528 if (!is_peer_flow_needed(flow)) {
4529 *__flow = flow;
4530 return 0;
4531 }
4532
4533 if (!mlx5_devcom_for_each_peer_begin(devcom)) {
4534 err = -ENODEV;
4535 goto clean_flow;
4536 }
4537
4538 mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
4539 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw);
4540 if (err)
4541 goto peer_clean;
4542 }
4543
4544 mlx5_devcom_for_each_peer_end(devcom);
4545
4546 *__flow = flow;
4547 return 0;
4548
4549 peer_clean:
4550 mlx5e_tc_del_fdb_peers_flow(flow);
4551 mlx5_devcom_for_each_peer_end(devcom);
4552 clean_flow:
4553 mlx5e_tc_del_fdb_flow(priv, flow);
4554 return err;
4555 }
4556
4557 static int
mlx5e_add_nic_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flow_flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** __flow)4558 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4559 struct flow_cls_offload *f,
4560 unsigned long flow_flags,
4561 struct net_device *filter_dev,
4562 struct mlx5e_tc_flow **__flow)
4563 {
4564 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4565 struct netlink_ext_ack *extack = f->common.extack;
4566 struct mlx5e_tc_flow_parse_attr *parse_attr;
4567 struct mlx5e_tc_flow *flow;
4568 int attr_size, err;
4569
4570 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4571 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4572 return -EOPNOTSUPP;
4573 } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4574 return -EOPNOTSUPP;
4575 }
4576
4577 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4578 attr_size = sizeof(struct mlx5_nic_flow_attr);
4579 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4580 &parse_attr, &flow);
4581 if (err)
4582 goto out;
4583
4584 parse_attr->filter_dev = filter_dev;
4585 mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4586
4587 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4588 f, filter_dev);
4589 if (err)
4590 goto err_free;
4591
4592 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4593 &flow->attr->ct_attr, extack);
4594 if (err)
4595 goto err_free;
4596
4597 err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4598 if (err)
4599 goto err_free;
4600
4601 err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4602 if (err)
4603 goto err_free;
4604
4605 flow_flag_set(flow, OFFLOADED);
4606 *__flow = flow;
4607
4608 return 0;
4609
4610 err_free:
4611 flow_flag_set(flow, FAILED);
4612 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4613 mlx5e_flow_put(priv, flow);
4614 out:
4615 return err;
4616 }
4617
4618 static int
mlx5e_tc_add_flow(struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags,struct net_device * filter_dev,struct mlx5e_tc_flow ** flow)4619 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4620 struct flow_cls_offload *f,
4621 unsigned long flags,
4622 struct net_device *filter_dev,
4623 struct mlx5e_tc_flow **flow)
4624 {
4625 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4626 unsigned long flow_flags;
4627 int err;
4628
4629 get_flags(flags, &flow_flags);
4630
4631 if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4632 return -EOPNOTSUPP;
4633
4634 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4635 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4636 filter_dev, flow);
4637 else
4638 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4639 filter_dev, flow);
4640
4641 return err;
4642 }
4643
is_flow_rule_duplicate_allowed(struct net_device * dev,struct mlx5e_rep_priv * rpriv)4644 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4645 struct mlx5e_rep_priv *rpriv)
4646 {
4647 /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4648 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4649 * function is called from NIC mode.
4650 */
4651 return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4652 }
4653
4654 /* As IPsec and TC order is not aligned between software and hardware-offload,
4655 * either IPsec offload or TC offload, not both, is allowed for a specific interface.
4656 */
is_tc_ipsec_order_check_needed(struct net_device * filter,struct mlx5e_priv * priv)4657 static bool is_tc_ipsec_order_check_needed(struct net_device *filter, struct mlx5e_priv *priv)
4658 {
4659 if (!IS_ENABLED(CONFIG_MLX5_EN_IPSEC))
4660 return false;
4661
4662 if (filter != priv->netdev)
4663 return false;
4664
4665 if (mlx5e_eswitch_vf_rep(priv->netdev))
4666 return false;
4667
4668 return true;
4669 }
4670
mlx5e_tc_block_ipsec_offload(struct net_device * filter,struct mlx5e_priv * priv)4671 static int mlx5e_tc_block_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv)
4672 {
4673 struct mlx5_core_dev *mdev = priv->mdev;
4674
4675 if (!is_tc_ipsec_order_check_needed(filter, priv))
4676 return 0;
4677
4678 if (mdev->num_block_tc)
4679 return -EBUSY;
4680
4681 mdev->num_block_ipsec++;
4682
4683 return 0;
4684 }
4685
mlx5e_tc_unblock_ipsec_offload(struct net_device * filter,struct mlx5e_priv * priv)4686 static void mlx5e_tc_unblock_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv)
4687 {
4688 if (!is_tc_ipsec_order_check_needed(filter, priv))
4689 return;
4690
4691 priv->mdev->num_block_ipsec--;
4692 }
4693
mlx5e_configure_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4694 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4695 struct flow_cls_offload *f, unsigned long flags)
4696 {
4697 struct netlink_ext_ack *extack = f->common.extack;
4698 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4699 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4700 struct mlx5e_tc_flow *flow;
4701 int err = 0;
4702
4703 if (!mlx5_esw_hold(priv->mdev))
4704 return -EBUSY;
4705
4706 err = mlx5e_tc_block_ipsec_offload(dev, priv);
4707 if (err)
4708 goto esw_release;
4709
4710 mlx5_esw_get(priv->mdev);
4711
4712 rcu_read_lock();
4713 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4714 if (flow) {
4715 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4716 * just return 0.
4717 */
4718 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4719 goto rcu_unlock;
4720
4721 NL_SET_ERR_MSG_MOD(extack,
4722 "flow cookie already exists, ignoring");
4723 netdev_warn_once(priv->netdev,
4724 "flow cookie %lx already exists, ignoring\n",
4725 f->cookie);
4726 err = -EEXIST;
4727 goto rcu_unlock;
4728 }
4729 rcu_unlock:
4730 rcu_read_unlock();
4731 if (flow)
4732 goto out;
4733
4734 trace_mlx5e_configure_flower(f);
4735 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4736 if (err)
4737 goto out;
4738
4739 /* Flow rule offloaded to non-uplink representor sharing tc block,
4740 * set the flow's owner dev.
4741 */
4742 if (is_flow_rule_duplicate_allowed(dev, rpriv))
4743 flow->orig_dev = dev;
4744
4745 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4746 if (err)
4747 goto err_free;
4748
4749 mlx5_esw_release(priv->mdev);
4750 return 0;
4751
4752 err_free:
4753 mlx5e_flow_put(priv, flow);
4754 out:
4755 mlx5e_tc_unblock_ipsec_offload(dev, priv);
4756 mlx5_esw_put(priv->mdev);
4757 esw_release:
4758 mlx5_esw_release(priv->mdev);
4759 return err;
4760 }
4761
same_flow_direction(struct mlx5e_tc_flow * flow,int flags)4762 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4763 {
4764 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4765 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4766
4767 return flow_flag_test(flow, INGRESS) == dir_ingress &&
4768 flow_flag_test(flow, EGRESS) == dir_egress;
4769 }
4770
mlx5e_delete_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4771 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4772 struct flow_cls_offload *f, unsigned long flags)
4773 {
4774 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4775 struct mlx5e_tc_flow *flow;
4776 int err;
4777
4778 rcu_read_lock();
4779 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4780 if (!flow || !same_flow_direction(flow, flags)) {
4781 err = -EINVAL;
4782 goto errout;
4783 }
4784
4785 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4786 * set.
4787 */
4788 if (flow_flag_test_and_set(flow, DELETED)) {
4789 err = -EINVAL;
4790 goto errout;
4791 }
4792 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4793 rcu_read_unlock();
4794
4795 trace_mlx5e_delete_flower(f);
4796 mlx5e_flow_put(priv, flow);
4797
4798 mlx5e_tc_unblock_ipsec_offload(dev, priv);
4799 mlx5_esw_put(priv->mdev);
4800 return 0;
4801
4802 errout:
4803 rcu_read_unlock();
4804 return err;
4805 }
4806
mlx5e_tc_fill_action_stats(struct mlx5e_priv * priv,struct flow_offload_action * fl_act)4807 int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv,
4808 struct flow_offload_action *fl_act)
4809 {
4810 return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act);
4811 }
4812
mlx5e_stats_flower(struct net_device * dev,struct mlx5e_priv * priv,struct flow_cls_offload * f,unsigned long flags)4813 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4814 struct flow_cls_offload *f, unsigned long flags)
4815 {
4816 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4817 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4818 struct mlx5e_tc_flow *flow;
4819 struct mlx5_fc *counter;
4820 u64 lastuse = 0;
4821 u64 packets = 0;
4822 u64 bytes = 0;
4823 int err = 0;
4824
4825 rcu_read_lock();
4826 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4827 tc_ht_params));
4828 rcu_read_unlock();
4829 if (IS_ERR(flow))
4830 return PTR_ERR(flow);
4831
4832 if (!same_flow_direction(flow, flags)) {
4833 err = -EINVAL;
4834 goto errout;
4835 }
4836
4837 if (mlx5e_is_offloaded_flow(flow)) {
4838 if (flow_flag_test(flow, USE_ACT_STATS)) {
4839 f->use_act_stats = true;
4840 } else {
4841 counter = mlx5e_tc_get_counter(flow);
4842 if (!counter)
4843 goto errout;
4844
4845 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4846 }
4847 }
4848
4849 /* Under multipath it's possible for one rule to be currently
4850 * un-offloaded while the other rule is offloaded.
4851 */
4852 if (esw && !mlx5_devcom_for_each_peer_begin(esw->devcom))
4853 goto out;
4854
4855 if (flow_flag_test(flow, DUP)) {
4856 struct mlx5e_tc_flow *peer_flow;
4857
4858 list_for_each_entry(peer_flow, &flow->peer_flows, peer_flows) {
4859 u64 packets2;
4860 u64 lastuse2;
4861 u64 bytes2;
4862
4863 if (!flow_flag_test(peer_flow, OFFLOADED))
4864 continue;
4865 if (flow_flag_test(flow, USE_ACT_STATS)) {
4866 f->use_act_stats = true;
4867 break;
4868 }
4869
4870 counter = mlx5e_tc_get_counter(peer_flow);
4871 if (!counter)
4872 goto no_peer_counter;
4873 mlx5_fc_query_cached(counter, &bytes2, &packets2,
4874 &lastuse2);
4875
4876 bytes += bytes2;
4877 packets += packets2;
4878 lastuse = max_t(u64, lastuse, lastuse2);
4879 }
4880 }
4881
4882 no_peer_counter:
4883 if (esw)
4884 mlx5_devcom_for_each_peer_end(esw->devcom);
4885 out:
4886 flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4887 FLOW_ACTION_HW_STATS_DELAYED);
4888 trace_mlx5e_stats_flower(f);
4889 errout:
4890 mlx5e_flow_put(priv, flow);
4891 return err;
4892 }
4893
apply_police_params(struct mlx5e_priv * priv,u64 rate,struct netlink_ext_ack * extack)4894 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4895 struct netlink_ext_ack *extack)
4896 {
4897 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4898 struct mlx5_eswitch *esw;
4899 u32 rate_mbps = 0;
4900 u16 vport_num;
4901 int err;
4902
4903 vport_num = rpriv->rep->vport;
4904 if (vport_num >= MLX5_VPORT_ECPF) {
4905 NL_SET_ERR_MSG_MOD(extack,
4906 "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4907 return -EOPNOTSUPP;
4908 }
4909
4910 esw = priv->mdev->priv.eswitch;
4911 /* rate is given in bytes/sec.
4912 * First convert to bits/sec and then round to the nearest mbit/secs.
4913 * mbit means million bits.
4914 * Moreover, if rate is non zero we choose to configure to a minimum of
4915 * 1 mbit/sec.
4916 */
4917 if (rate) {
4918 rate = (rate * BITS_PER_BYTE) + 500000;
4919 do_div(rate, 1000000);
4920 rate_mbps = max_t(u32, rate, 1);
4921 }
4922
4923 err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
4924 if (err)
4925 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4926
4927 return err;
4928 }
4929
4930 static int
tc_matchall_police_validate(const struct flow_action * action,const struct flow_action_entry * act,struct netlink_ext_ack * extack)4931 tc_matchall_police_validate(const struct flow_action *action,
4932 const struct flow_action_entry *act,
4933 struct netlink_ext_ack *extack)
4934 {
4935 if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
4936 NL_SET_ERR_MSG_MOD(extack,
4937 "Offload not supported when conform action is not continue");
4938 return -EOPNOTSUPP;
4939 }
4940
4941 if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
4942 NL_SET_ERR_MSG_MOD(extack,
4943 "Offload not supported when exceed action is not drop");
4944 return -EOPNOTSUPP;
4945 }
4946
4947 if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
4948 !flow_action_is_last_entry(action, act)) {
4949 NL_SET_ERR_MSG_MOD(extack,
4950 "Offload not supported when conform action is ok, but action is not last");
4951 return -EOPNOTSUPP;
4952 }
4953
4954 if (act->police.peakrate_bytes_ps ||
4955 act->police.avrate || act->police.overhead) {
4956 NL_SET_ERR_MSG_MOD(extack,
4957 "Offload not supported when peakrate/avrate/overhead is configured");
4958 return -EOPNOTSUPP;
4959 }
4960
4961 return 0;
4962 }
4963
scan_tc_matchall_fdb_actions(struct mlx5e_priv * priv,struct flow_action * flow_action,struct netlink_ext_ack * extack)4964 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4965 struct flow_action *flow_action,
4966 struct netlink_ext_ack *extack)
4967 {
4968 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4969 const struct flow_action_entry *act;
4970 int err;
4971 int i;
4972
4973 if (!flow_action_has_entries(flow_action)) {
4974 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4975 return -EINVAL;
4976 }
4977
4978 if (!flow_offload_has_one_action(flow_action)) {
4979 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4980 return -EOPNOTSUPP;
4981 }
4982
4983 if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
4984 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4985 return -EOPNOTSUPP;
4986 }
4987
4988 flow_action_for_each(i, act, flow_action) {
4989 switch (act->id) {
4990 case FLOW_ACTION_POLICE:
4991 err = tc_matchall_police_validate(flow_action, act, extack);
4992 if (err)
4993 return err;
4994
4995 err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4996 if (err)
4997 return err;
4998
4999 mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats,
5000 &priv->stats.rep_stats);
5001 break;
5002 default:
5003 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
5004 return -EOPNOTSUPP;
5005 }
5006 }
5007
5008 return 0;
5009 }
5010
mlx5e_tc_configure_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)5011 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
5012 struct tc_cls_matchall_offload *ma)
5013 {
5014 struct netlink_ext_ack *extack = ma->common.extack;
5015
5016 if (ma->common.prio != 1) {
5017 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
5018 return -EINVAL;
5019 }
5020
5021 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
5022 }
5023
mlx5e_tc_delete_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)5024 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
5025 struct tc_cls_matchall_offload *ma)
5026 {
5027 struct netlink_ext_ack *extack = ma->common.extack;
5028
5029 return apply_police_params(priv, 0, extack);
5030 }
5031
mlx5e_tc_stats_matchall(struct mlx5e_priv * priv,struct tc_cls_matchall_offload * ma)5032 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
5033 struct tc_cls_matchall_offload *ma)
5034 {
5035 struct mlx5e_rep_priv *rpriv = priv->ppriv;
5036 struct rtnl_link_stats64 cur_stats;
5037 u64 dbytes;
5038 u64 dpkts;
5039
5040 mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats);
5041 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
5042 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
5043 rpriv->prev_vf_vport_stats = cur_stats;
5044 flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
5045 FLOW_ACTION_HW_STATS_DELAYED);
5046 }
5047
mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv * priv,struct mlx5e_priv * peer_priv)5048 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
5049 struct mlx5e_priv *peer_priv)
5050 {
5051 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5052 struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
5053 struct mlx5e_hairpin_entry *hpe, *tmp;
5054 LIST_HEAD(init_wait_list);
5055 u16 peer_vhca_id;
5056 int bkt;
5057
5058 if (!mlx5e_same_hw_devs(priv, peer_priv))
5059 return;
5060
5061 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
5062
5063 mutex_lock(&tc->hairpin_tbl_lock);
5064 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
5065 if (refcount_inc_not_zero(&hpe->refcnt))
5066 list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5067 mutex_unlock(&tc->hairpin_tbl_lock);
5068
5069 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5070 wait_for_completion(&hpe->res_ready);
5071 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5072 mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
5073
5074 mlx5e_hairpin_put(priv, hpe);
5075 }
5076 }
5077
mlx5e_tc_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)5078 static int mlx5e_tc_netdev_event(struct notifier_block *this,
5079 unsigned long event, void *ptr)
5080 {
5081 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
5082 struct mlx5e_priv *peer_priv;
5083 struct mlx5e_tc_table *tc;
5084 struct mlx5e_priv *priv;
5085
5086 if (ndev->netdev_ops != &mlx5e_netdev_ops ||
5087 event != NETDEV_UNREGISTER ||
5088 ndev->reg_state == NETREG_REGISTERED)
5089 return NOTIFY_DONE;
5090
5091 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
5092 priv = tc->priv;
5093 peer_priv = netdev_priv(ndev);
5094 if (priv == peer_priv ||
5095 !(priv->netdev->features & NETIF_F_HW_TC))
5096 return NOTIFY_DONE;
5097
5098 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
5099
5100 return NOTIFY_DONE;
5101 }
5102
mlx5e_tc_nic_create_miss_table(struct mlx5e_priv * priv)5103 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
5104 {
5105 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5106 struct mlx5_flow_table **ft = &tc->miss_t;
5107 struct mlx5_flow_table_attr ft_attr = {};
5108 struct mlx5_flow_namespace *ns;
5109 int err = 0;
5110
5111 ft_attr.max_fte = 1;
5112 ft_attr.autogroup.max_num_groups = 1;
5113 ft_attr.level = MLX5E_TC_MISS_LEVEL;
5114 ft_attr.prio = 0;
5115 ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
5116
5117 *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
5118 if (IS_ERR(*ft)) {
5119 err = PTR_ERR(*ft);
5120 netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
5121 }
5122
5123 return err;
5124 }
5125
mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv * priv)5126 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
5127 {
5128 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5129
5130 mlx5_destroy_flow_table(tc->miss_t);
5131 }
5132
mlx5e_tc_nic_init(struct mlx5e_priv * priv)5133 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
5134 {
5135 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5136 struct mlx5_core_dev *dev = priv->mdev;
5137 struct mapping_ctx *chains_mapping;
5138 struct mlx5_chains_attr attr = {};
5139 u64 mapping_id;
5140 int err;
5141
5142 mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5143 mutex_init(&tc->t_lock);
5144 mutex_init(&tc->hairpin_tbl_lock);
5145 hash_init(tc->hairpin_tbl);
5146 tc->priv = priv;
5147
5148 err = rhashtable_init(&tc->ht, &tc_ht_params);
5149 if (err)
5150 return err;
5151
5152 lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
5153 lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
5154
5155 mapping_id = mlx5_query_nic_system_image_guid(dev);
5156
5157 chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
5158 sizeof(struct mlx5_mapped_obj),
5159 MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
5160
5161 if (IS_ERR(chains_mapping)) {
5162 err = PTR_ERR(chains_mapping);
5163 goto err_mapping;
5164 }
5165 tc->mapping = chains_mapping;
5166
5167 err = mlx5e_tc_nic_create_miss_table(priv);
5168 if (err)
5169 goto err_chains;
5170
5171 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
5172 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5173 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5174 attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5175 attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5176 attr.default_ft = tc->miss_t;
5177 attr.mapping = chains_mapping;
5178 attr.fs_base_prio = MLX5E_TC_PRIO;
5179
5180 tc->chains = mlx5_chains_create(dev, &attr);
5181 if (IS_ERR(tc->chains)) {
5182 err = PTR_ERR(tc->chains);
5183 goto err_miss;
5184 }
5185
5186 mlx5_chains_print_info(tc->chains);
5187
5188 tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
5189 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
5190 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
5191
5192 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5193 err = register_netdevice_notifier_dev_net(priv->netdev,
5194 &tc->netdevice_nb,
5195 &tc->netdevice_nn);
5196 if (err) {
5197 tc->netdevice_nb.notifier_call = NULL;
5198 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5199 goto err_reg;
5200 }
5201
5202 mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs));
5203
5204 tc->action_stats_handle = mlx5e_tc_act_stats_create();
5205 if (IS_ERR(tc->action_stats_handle)) {
5206 err = PTR_ERR(tc->action_stats_handle);
5207 goto err_act_stats;
5208 }
5209
5210 return 0;
5211
5212 err_act_stats:
5213 unregister_netdevice_notifier_dev_net(priv->netdev,
5214 &tc->netdevice_nb,
5215 &tc->netdevice_nn);
5216 err_reg:
5217 mlx5_tc_ct_clean(tc->ct);
5218 mlx5e_tc_post_act_destroy(tc->post_act);
5219 mlx5_chains_destroy(tc->chains);
5220 err_miss:
5221 mlx5e_tc_nic_destroy_miss_table(priv);
5222 err_chains:
5223 mapping_destroy(chains_mapping);
5224 err_mapping:
5225 rhashtable_destroy(&tc->ht);
5226 return err;
5227 }
5228
_mlx5e_tc_del_flow(void * ptr,void * arg)5229 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5230 {
5231 struct mlx5e_tc_flow *flow = ptr;
5232 struct mlx5e_priv *priv = flow->priv;
5233
5234 mlx5e_tc_del_flow(priv, flow);
5235 kfree(flow);
5236 }
5237
mlx5e_tc_nic_cleanup(struct mlx5e_priv * priv)5238 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5239 {
5240 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5241
5242 debugfs_remove_recursive(tc->dfs_root);
5243
5244 if (tc->netdevice_nb.notifier_call)
5245 unregister_netdevice_notifier_dev_net(priv->netdev,
5246 &tc->netdevice_nb,
5247 &tc->netdevice_nn);
5248
5249 mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5250 mutex_destroy(&tc->hairpin_tbl_lock);
5251
5252 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5253
5254 if (!IS_ERR_OR_NULL(tc->t)) {
5255 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5256 tc->t = NULL;
5257 }
5258 mutex_destroy(&tc->t_lock);
5259
5260 mlx5_tc_ct_clean(tc->ct);
5261 mlx5e_tc_post_act_destroy(tc->post_act);
5262 mapping_destroy(tc->mapping);
5263 mlx5_chains_destroy(tc->chains);
5264 mlx5e_tc_nic_destroy_miss_table(priv);
5265 mlx5e_tc_act_stats_free(tc->action_stats_handle);
5266 }
5267
mlx5e_tc_ht_init(struct rhashtable * tc_ht)5268 int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
5269 {
5270 int err;
5271
5272 err = rhashtable_init(tc_ht, &tc_ht_params);
5273 if (err)
5274 return err;
5275
5276 lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
5277 lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
5278
5279 return 0;
5280 }
5281
mlx5e_tc_ht_cleanup(struct rhashtable * tc_ht)5282 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
5283 {
5284 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5285 }
5286
mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv * uplink_priv)5287 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
5288 {
5289 const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5290 struct netdev_phys_item_id ppid;
5291 struct mlx5e_rep_priv *rpriv;
5292 struct mapping_ctx *mapping;
5293 struct mlx5_eswitch *esw;
5294 struct mlx5e_priv *priv;
5295 u64 mapping_id, key;
5296 int err = 0;
5297
5298 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5299 priv = netdev_priv(rpriv->netdev);
5300 esw = priv->mdev->priv.eswitch;
5301
5302 uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
5303 MLX5_FLOW_NAMESPACE_FDB);
5304 uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5305 esw_chains(esw),
5306 &esw->offloads.mod_hdr,
5307 MLX5_FLOW_NAMESPACE_FDB,
5308 uplink_priv->post_act);
5309
5310 uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
5311
5312 uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
5313
5314 mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
5315
5316 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
5317 sizeof(struct tunnel_match_key),
5318 TUNNEL_INFO_BITS_MASK, true);
5319
5320 if (IS_ERR(mapping)) {
5321 err = PTR_ERR(mapping);
5322 goto err_tun_mapping;
5323 }
5324 uplink_priv->tunnel_mapping = mapping;
5325
5326 /* Two last values are reserved for stack devices slow path table mark
5327 * and bridge ingress push mark.
5328 */
5329 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
5330 sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
5331 if (IS_ERR(mapping)) {
5332 err = PTR_ERR(mapping);
5333 goto err_enc_opts_mapping;
5334 }
5335 uplink_priv->tunnel_enc_opts_mapping = mapping;
5336
5337 uplink_priv->encap = mlx5e_tc_tun_init(priv);
5338 if (IS_ERR(uplink_priv->encap)) {
5339 err = PTR_ERR(uplink_priv->encap);
5340 goto err_register_fib_notifier;
5341 }
5342
5343 uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create();
5344 if (IS_ERR(uplink_priv->action_stats_handle)) {
5345 err = PTR_ERR(uplink_priv->action_stats_handle);
5346 goto err_action_counter;
5347 }
5348
5349 err = dev_get_port_parent_id(priv->netdev, &ppid, false);
5350 if (!err) {
5351 memcpy(&key, &ppid.id, sizeof(key));
5352 mlx5_esw_offloads_devcom_init(esw, key);
5353 }
5354
5355 return 0;
5356
5357 err_action_counter:
5358 mlx5e_tc_tun_cleanup(uplink_priv->encap);
5359 err_register_fib_notifier:
5360 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5361 err_enc_opts_mapping:
5362 mapping_destroy(uplink_priv->tunnel_mapping);
5363 err_tun_mapping:
5364 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5365 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5366 mlx5_tc_ct_clean(uplink_priv->ct_priv);
5367 netdev_warn(priv->netdev,
5368 "Failed to initialize tc (eswitch), err: %d", err);
5369 mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5370 return err;
5371 }
5372
mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv * uplink_priv)5373 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
5374 {
5375 struct mlx5e_rep_priv *rpriv;
5376 struct mlx5_eswitch *esw;
5377 struct mlx5e_priv *priv;
5378
5379 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5380 priv = netdev_priv(rpriv->netdev);
5381 esw = priv->mdev->priv.eswitch;
5382
5383 mlx5_esw_offloads_devcom_cleanup(esw);
5384
5385 mlx5e_tc_tun_cleanup(uplink_priv->encap);
5386
5387 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5388 mapping_destroy(uplink_priv->tunnel_mapping);
5389
5390 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5391 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5392 mlx5_tc_ct_clean(uplink_priv->ct_priv);
5393 mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
5394 mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5395 mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle);
5396 }
5397
mlx5e_tc_num_filters(struct mlx5e_priv * priv,unsigned long flags)5398 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5399 {
5400 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5401
5402 return atomic_read(&tc_ht->nelems);
5403 }
5404
mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch * esw)5405 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5406 {
5407 struct mlx5e_tc_flow *flow, *tmp;
5408 int i;
5409
5410 for (i = 0; i < MLX5_MAX_PORTS; i++) {
5411 if (i == mlx5_get_dev_index(esw->dev))
5412 continue;
5413 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i])
5414 mlx5e_tc_del_fdb_peers_flow(flow);
5415 }
5416 }
5417
mlx5e_tc_reoffload_flows_work(struct work_struct * work)5418 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5419 {
5420 struct mlx5_rep_uplink_priv *rpriv =
5421 container_of(work, struct mlx5_rep_uplink_priv,
5422 reoffload_flows_work);
5423 struct mlx5e_tc_flow *flow, *tmp;
5424
5425 mutex_lock(&rpriv->unready_flows_lock);
5426 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5427 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5428 unready_flow_del(flow);
5429 }
5430 mutex_unlock(&rpriv->unready_flows_lock);
5431 }
5432
mlx5e_setup_tc_cls_flower(struct mlx5e_priv * priv,struct flow_cls_offload * cls_flower,unsigned long flags)5433 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5434 struct flow_cls_offload *cls_flower,
5435 unsigned long flags)
5436 {
5437 switch (cls_flower->command) {
5438 case FLOW_CLS_REPLACE:
5439 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5440 flags);
5441 case FLOW_CLS_DESTROY:
5442 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5443 flags);
5444 case FLOW_CLS_STATS:
5445 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5446 flags);
5447 default:
5448 return -EOPNOTSUPP;
5449 }
5450 }
5451
mlx5e_setup_tc_block_cb(enum tc_setup_type type,void * type_data,void * cb_priv)5452 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5453 void *cb_priv)
5454 {
5455 unsigned long flags = MLX5_TC_FLAG(INGRESS);
5456 struct mlx5e_priv *priv = cb_priv;
5457
5458 if (!priv->netdev || !netif_device_present(priv->netdev))
5459 return -EOPNOTSUPP;
5460
5461 if (mlx5e_is_uplink_rep(priv))
5462 flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5463 else
5464 flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5465
5466 switch (type) {
5467 case TC_SETUP_CLSFLOWER:
5468 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5469 default:
5470 return -EOPNOTSUPP;
5471 }
5472 }
5473
mlx5e_tc_restore_tunnel(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5e_tc_update_priv * tc_priv,u32 tunnel_id)5474 static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
5475 struct mlx5e_tc_update_priv *tc_priv,
5476 u32 tunnel_id)
5477 {
5478 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5479 struct tunnel_match_enc_opts enc_opts = {};
5480 struct mlx5_rep_uplink_priv *uplink_priv;
5481 struct mlx5e_rep_priv *uplink_rpriv;
5482 struct metadata_dst *tun_dst;
5483 struct tunnel_match_key key;
5484 u32 tun_id, enc_opts_id;
5485 struct net_device *dev;
5486 int err;
5487
5488 enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
5489 tun_id = tunnel_id >> ENC_OPTS_BITS;
5490
5491 if (!tun_id)
5492 return true;
5493
5494 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
5495 uplink_priv = &uplink_rpriv->uplink_priv;
5496
5497 err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
5498 if (err) {
5499 netdev_dbg(priv->netdev,
5500 "Couldn't find tunnel for tun_id: %d, err: %d\n",
5501 tun_id, err);
5502 return false;
5503 }
5504
5505 if (enc_opts_id) {
5506 err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
5507 enc_opts_id, &enc_opts);
5508 if (err) {
5509 netdev_dbg(priv->netdev,
5510 "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
5511 enc_opts_id, err);
5512 return false;
5513 }
5514 }
5515
5516 switch (key.enc_control.addr_type) {
5517 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
5518 tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
5519 key.enc_ip.tos, key.enc_ip.ttl,
5520 key.enc_tp.dst, TUNNEL_KEY,
5521 key32_to_tunnel_id(key.enc_key_id.keyid),
5522 enc_opts.key.len);
5523 break;
5524 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
5525 tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
5526 key.enc_ip.tos, key.enc_ip.ttl,
5527 key.enc_tp.dst, 0, TUNNEL_KEY,
5528 key32_to_tunnel_id(key.enc_key_id.keyid),
5529 enc_opts.key.len);
5530 break;
5531 default:
5532 netdev_dbg(priv->netdev,
5533 "Couldn't restore tunnel, unsupported addr_type: %d\n",
5534 key.enc_control.addr_type);
5535 return false;
5536 }
5537
5538 if (!tun_dst) {
5539 netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
5540 return false;
5541 }
5542
5543 tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
5544
5545 if (enc_opts.key.len)
5546 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
5547 enc_opts.key.data,
5548 enc_opts.key.len,
5549 enc_opts.key.dst_opt_type);
5550
5551 skb_dst_set(skb, (struct dst_entry *)tun_dst);
5552 dev = dev_get_by_index(&init_net, key.filter_ifindex);
5553 if (!dev) {
5554 netdev_dbg(priv->netdev,
5555 "Couldn't find tunnel device with ifindex: %d\n",
5556 key.filter_ifindex);
5557 return false;
5558 }
5559
5560 /* Set fwd_dev so we do dev_put() after datapath */
5561 tc_priv->fwd_dev = dev;
5562
5563 skb->dev = dev;
5564
5565 return true;
5566 }
5567
mlx5e_tc_restore_skb_tc_meta(struct sk_buff * skb,struct mlx5_tc_ct_priv * ct_priv,struct mlx5_mapped_obj * mapped_obj,u32 zone_restore_id,u32 tunnel_id,struct mlx5e_tc_update_priv * tc_priv)5568 static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv,
5569 struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id,
5570 u32 tunnel_id, struct mlx5e_tc_update_priv *tc_priv)
5571 {
5572 struct mlx5e_priv *priv = netdev_priv(skb->dev);
5573 struct tc_skb_ext *tc_skb_ext;
5574 u64 act_miss_cookie;
5575 u32 chain;
5576
5577 chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0;
5578 act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ?
5579 mapped_obj->act_miss_cookie : 0;
5580 if (chain || act_miss_cookie) {
5581 if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id))
5582 return false;
5583
5584 tc_skb_ext = tc_skb_ext_alloc(skb);
5585 if (!tc_skb_ext) {
5586 WARN_ON(1);
5587 return false;
5588 }
5589
5590 if (act_miss_cookie) {
5591 tc_skb_ext->act_miss_cookie = act_miss_cookie;
5592 tc_skb_ext->act_miss = 1;
5593 } else {
5594 tc_skb_ext->chain = chain;
5595 }
5596 }
5597
5598 if (tc_priv)
5599 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id);
5600
5601 return true;
5602 }
5603
mlx5e_tc_restore_skb_sample(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5_mapped_obj * mapped_obj,struct mlx5e_tc_update_priv * tc_priv)5604 static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
5605 struct mlx5_mapped_obj *mapped_obj,
5606 struct mlx5e_tc_update_priv *tc_priv)
5607 {
5608 if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
5609 netdev_dbg(priv->netdev,
5610 "Failed to restore tunnel info for sampled packet\n");
5611 return;
5612 }
5613 mlx5e_tc_sample_skb(skb, mapped_obj);
5614 }
5615
mlx5e_tc_restore_skb_int_port(struct mlx5e_priv * priv,struct sk_buff * skb,struct mlx5_mapped_obj * mapped_obj,struct mlx5e_tc_update_priv * tc_priv,u32 tunnel_id)5616 static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
5617 struct mlx5_mapped_obj *mapped_obj,
5618 struct mlx5e_tc_update_priv *tc_priv,
5619 u32 tunnel_id)
5620 {
5621 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5622 struct mlx5_rep_uplink_priv *uplink_priv;
5623 struct mlx5e_rep_priv *uplink_rpriv;
5624 bool forward_tx = false;
5625
5626 /* Tunnel restore takes precedence over int port restore */
5627 if (tunnel_id)
5628 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id);
5629
5630 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
5631 uplink_priv = &uplink_rpriv->uplink_priv;
5632
5633 if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
5634 mapped_obj->int_port_metadata, &forward_tx)) {
5635 /* Set fwd_dev for future dev_put */
5636 tc_priv->fwd_dev = skb->dev;
5637 tc_priv->forward_tx = forward_tx;
5638
5639 return true;
5640 }
5641
5642 return false;
5643 }
5644
mlx5e_tc_update_skb(struct mlx5_cqe64 * cqe,struct sk_buff * skb,struct mapping_ctx * mapping_ctx,u32 mapped_obj_id,struct mlx5_tc_ct_priv * ct_priv,u32 zone_restore_id,u32 tunnel_id,struct mlx5e_tc_update_priv * tc_priv)5645 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
5646 struct mapping_ctx *mapping_ctx, u32 mapped_obj_id,
5647 struct mlx5_tc_ct_priv *ct_priv,
5648 u32 zone_restore_id, u32 tunnel_id,
5649 struct mlx5e_tc_update_priv *tc_priv)
5650 {
5651 struct mlx5e_priv *priv = netdev_priv(skb->dev);
5652 struct mlx5_mapped_obj mapped_obj;
5653 int err;
5654
5655 err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj);
5656 if (err) {
5657 netdev_dbg(skb->dev,
5658 "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n",
5659 mapped_obj_id, err);
5660 return false;
5661 }
5662
5663 switch (mapped_obj.type) {
5664 case MLX5_MAPPED_OBJ_CHAIN:
5665 case MLX5_MAPPED_OBJ_ACT_MISS:
5666 return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id,
5667 tunnel_id, tc_priv);
5668 case MLX5_MAPPED_OBJ_SAMPLE:
5669 mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv);
5670 tc_priv->skb_done = true;
5671 return true;
5672 case MLX5_MAPPED_OBJ_INT_PORT_METADATA:
5673 return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id);
5674 default:
5675 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5676 return false;
5677 }
5678
5679 return false;
5680 }
5681
mlx5e_tc_update_skb_nic(struct mlx5_cqe64 * cqe,struct sk_buff * skb)5682 bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
5683 {
5684 struct mlx5e_priv *priv = netdev_priv(skb->dev);
5685 u32 mapped_obj_id, reg_b, zone_restore_id;
5686 struct mlx5_tc_ct_priv *ct_priv;
5687 struct mapping_ctx *mapping_ctx;
5688 struct mlx5e_tc_table *tc;
5689
5690 reg_b = be32_to_cpu(cqe->ft_metadata);
5691 tc = mlx5e_fs_get_tc(priv->fs);
5692 mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5693 zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5694 ESW_ZONE_ID_MASK;
5695 ct_priv = tc->ct;
5696 mapping_ctx = tc->mapping;
5697
5698 return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id,
5699 0, NULL);
5700 }
5701
5702 static struct mapping_ctx *
mlx5e_get_priv_obj_mapping(struct mlx5e_priv * priv)5703 mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv)
5704 {
5705 struct mlx5e_tc_table *tc;
5706 struct mlx5_eswitch *esw;
5707 struct mapping_ctx *ctx;
5708
5709 if (is_mdev_switchdev_mode(priv->mdev)) {
5710 esw = priv->mdev->priv.eswitch;
5711 ctx = esw->offloads.reg_c0_obj_pool;
5712 } else {
5713 tc = mlx5e_fs_get_tc(priv->fs);
5714 ctx = tc->mapping;
5715 }
5716
5717 return ctx;
5718 }
5719
mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,u64 act_miss_cookie,u32 * act_miss_mapping)5720 int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
5721 u64 act_miss_cookie, u32 *act_miss_mapping)
5722 {
5723 struct mlx5_mapped_obj mapped_obj = {};
5724 struct mlx5_eswitch *esw;
5725 struct mapping_ctx *ctx;
5726 int err;
5727
5728 ctx = mlx5e_get_priv_obj_mapping(priv);
5729 mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
5730 mapped_obj.act_miss_cookie = act_miss_cookie;
5731 err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
5732 if (err)
5733 return err;
5734
5735 if (!is_mdev_switchdev_mode(priv->mdev))
5736 return 0;
5737
5738 esw = priv->mdev->priv.eswitch;
5739 attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
5740 if (IS_ERR(attr->act_id_restore_rule)) {
5741 err = PTR_ERR(attr->act_id_restore_rule);
5742 goto err_rule;
5743 }
5744
5745 return 0;
5746
5747 err_rule:
5748 mapping_remove(ctx, *act_miss_mapping);
5749 return err;
5750 }
5751
mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,u32 act_miss_mapping)5752 void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
5753 u32 act_miss_mapping)
5754 {
5755 struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv);
5756
5757 if (is_mdev_switchdev_mode(priv->mdev))
5758 mlx5_del_flow_rules(attr->act_id_restore_rule);
5759 mapping_remove(ctx, act_miss_mapping);
5760 }
5761