1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <linux/etherdevice.h>
5 #include <linux/idr.h>
6 #include <linux/mlx5/driver.h>
7 #include <linux/mlx5/mlx5_ifc.h>
8 #include <linux/mlx5/vport.h>
9 #include <linux/mlx5/fs.h>
10 #include "mlx5_core.h"
11 #include "eswitch.h"
12 #include "en.h"
13 #include "en_tc.h"
14 #include "fs_core.h"
15 #include "esw/indir_table.h"
16 #include "lib/fs_chains.h"
17 #include "en/mod_hdr.h"
18 
19 #define MLX5_ESW_INDIR_TABLE_SIZE 128
20 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
21 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1)
22 
23 struct mlx5_esw_indir_table_rule {
24 	struct list_head list;
25 	struct mlx5_flow_handle *handle;
26 	union {
27 		__be32 v4;
28 		struct in6_addr v6;
29 	} dst_ip;
30 	u32 vni;
31 	struct mlx5_modify_hdr *mh;
32 	refcount_t refcnt;
33 };
34 
35 struct mlx5_esw_indir_table_entry {
36 	struct hlist_node hlist;
37 	struct mlx5_flow_table *ft;
38 	struct mlx5_flow_group *recirc_grp;
39 	struct mlx5_flow_group *fwd_grp;
40 	struct mlx5_flow_handle *fwd_rule;
41 	struct list_head recirc_rules;
42 	int recirc_cnt;
43 	int fwd_ref;
44 
45 	u16 vport;
46 	u8 ip_version;
47 };
48 
49 struct mlx5_esw_indir_table {
50 	struct mutex lock; /* protects table */
51 	DECLARE_HASHTABLE(table, 8);
52 };
53 
54 struct mlx5_esw_indir_table *
55 mlx5_esw_indir_table_init(void)
56 {
57 	struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL);
58 
59 	if (!indir)
60 		return ERR_PTR(-ENOMEM);
61 
62 	mutex_init(&indir->lock);
63 	hash_init(indir->table);
64 	return indir;
65 }
66 
67 void
68 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
69 {
70 	mutex_destroy(&indir->lock);
71 	kvfree(indir);
72 }
73 
74 bool
75 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
76 			    struct mlx5_flow_attr *attr,
77 			    u16 vport_num,
78 			    struct mlx5_core_dev *dest_mdev)
79 {
80 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
81 
82 	/* Use indirect table for all IP traffic from UL to VF with vport
83 	 * destination when source rewrite flag is set.
84 	 */
85 	return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
86 		mlx5_eswitch_is_vf_vport(esw, vport_num) &&
87 		esw->dev == dest_mdev &&
88 		attr->ip_version &&
89 		attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
90 }
91 
92 u16
93 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
94 {
95 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
96 
97 	return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0;
98 }
99 
100 static struct mlx5_esw_indir_table_rule *
101 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e,
102 				 struct mlx5_esw_flow_attr *attr)
103 {
104 	struct mlx5_esw_indir_table_rule *rule;
105 
106 	list_for_each_entry(rule, &e->recirc_rules, list)
107 		if (rule->vni == attr->rx_tun_attr->vni &&
108 		    !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip,
109 			    sizeof(attr->rx_tun_attr->dst_ip)))
110 			goto found;
111 	return NULL;
112 
113 found:
114 	refcount_inc(&rule->refcnt);
115 	return rule;
116 }
117 
118 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
119 					 struct mlx5_flow_attr *attr,
120 					 struct mlx5_flow_spec *spec,
121 					 struct mlx5_esw_indir_table_entry *e)
122 {
123 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
124 	struct mlx5_fs_chains *chains = esw_chains(esw);
125 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
126 	struct mlx5_flow_destination dest = {};
127 	struct mlx5_esw_indir_table_rule *rule;
128 	struct mlx5_flow_act flow_act = {};
129 	struct mlx5_flow_spec *rule_spec;
130 	struct mlx5_flow_handle *handle;
131 	int err = 0;
132 	u32 data;
133 
134 	rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr);
135 	if (rule)
136 		return 0;
137 
138 	if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX)
139 		return -EINVAL;
140 
141 	rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
142 	if (!rule_spec)
143 		return -ENOMEM;
144 
145 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
146 	if (!rule) {
147 		err = -ENOMEM;
148 		goto out;
149 	}
150 
151 	rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
152 					   MLX5_MATCH_MISC_PARAMETERS |
153 					   MLX5_MATCH_MISC_PARAMETERS_2;
154 	if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) {
155 		MLX5_SET(fte_match_param, rule_spec->match_criteria,
156 			 outer_headers.ip_version, 0xf);
157 		MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version,
158 			 attr->ip_version);
159 	} else if (attr->ip_version) {
160 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
161 				 outer_headers.ethertype);
162 		MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype,
163 			 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6));
164 	} else {
165 		err = -EOPNOTSUPP;
166 		goto err_ethertype;
167 	}
168 
169 	if (attr->ip_version == 4) {
170 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
171 				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
172 		MLX5_SET(fte_match_param, rule_spec->match_value,
173 			 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
174 			 ntohl(esw_attr->rx_tun_attr->dst_ip.v4));
175 	} else if (attr->ip_version == 6) {
176 		int len = sizeof(struct in6_addr);
177 
178 		memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
179 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
180 		       0xff, len);
181 		memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
182 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
183 		       &esw_attr->rx_tun_attr->dst_ip.v6, len);
184 	}
185 
186 	MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
187 			 misc_parameters.vxlan_vni);
188 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni,
189 		 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni));
190 
191 	MLX5_SET(fte_match_param, rule_spec->match_criteria,
192 		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
193 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
194 		 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch,
195 							   MLX5_VPORT_UPLINK));
196 
197 	/* Modify flow source to recirculate packet */
198 	data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport);
199 	err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
200 					VPORT_TO_REG, data);
201 	if (err)
202 		goto err_mod_hdr_regc0;
203 
204 	err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
205 					TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT);
206 	if (err)
207 		goto err_mod_hdr_regc1;
208 
209 	flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
210 						       mod_acts.num_actions, mod_acts.actions);
211 	if (IS_ERR(flow_act.modify_hdr)) {
212 		err = PTR_ERR(flow_act.modify_hdr);
213 		goto err_mod_hdr_alloc;
214 	}
215 
216 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
217 	flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
218 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
219 	dest.ft = mlx5_chains_get_table(chains, 0, 1, 0);
220 	if (IS_ERR(dest.ft)) {
221 		err = PTR_ERR(dest.ft);
222 		goto err_table;
223 	}
224 	handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1);
225 	if (IS_ERR(handle)) {
226 		err = PTR_ERR(handle);
227 		goto err_handle;
228 	}
229 
230 	mlx5e_mod_hdr_dealloc(&mod_acts);
231 	rule->handle = handle;
232 	rule->vni = esw_attr->rx_tun_attr->vni;
233 	rule->mh = flow_act.modify_hdr;
234 	memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
235 	       sizeof(esw_attr->rx_tun_attr->dst_ip));
236 	refcount_set(&rule->refcnt, 1);
237 	list_add(&rule->list, &e->recirc_rules);
238 	e->recirc_cnt++;
239 	goto out;
240 
241 err_handle:
242 	mlx5_chains_put_table(chains, 0, 1, 0);
243 err_table:
244 	mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
245 err_mod_hdr_alloc:
246 err_mod_hdr_regc1:
247 	mlx5e_mod_hdr_dealloc(&mod_acts);
248 err_mod_hdr_regc0:
249 err_ethertype:
250 	kfree(rule);
251 out:
252 	kvfree(rule_spec);
253 	return err;
254 }
255 
256 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw,
257 					  struct mlx5_flow_attr *attr,
258 					  struct mlx5_esw_indir_table_entry *e)
259 {
260 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
261 	struct mlx5_fs_chains *chains = esw_chains(esw);
262 	struct mlx5_esw_indir_table_rule *rule;
263 
264 	list_for_each_entry(rule, &e->recirc_rules, list)
265 		if (rule->vni == esw_attr->rx_tun_attr->vni &&
266 		    !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
267 			    sizeof(esw_attr->rx_tun_attr->dst_ip)))
268 			goto found;
269 
270 	return;
271 
272 found:
273 	if (!refcount_dec_and_test(&rule->refcnt))
274 		return;
275 
276 	mlx5_del_flow_rules(rule->handle);
277 	mlx5_chains_put_table(chains, 0, 1, 0);
278 	mlx5_modify_header_dealloc(esw->dev, rule->mh);
279 	list_del(&rule->list);
280 	kfree(rule);
281 	e->recirc_cnt--;
282 }
283 
284 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
285 					  struct mlx5_flow_attr *attr,
286 					  struct mlx5_flow_spec *spec,
287 					  struct mlx5_esw_indir_table_entry *e)
288 {
289 	int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
290 	u32 *in, *match;
291 
292 	in = kvzalloc(inlen, GFP_KERNEL);
293 	if (!in)
294 		return -ENOMEM;
295 
296 	MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
297 		 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2);
298 	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
299 
300 	if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version))
301 		MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf);
302 	else
303 		MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype);
304 
305 	if (attr->ip_version == 4) {
306 		MLX5_SET_TO_ONES(fte_match_param, match,
307 				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
308 	} else if (attr->ip_version == 6) {
309 		memset(MLX5_ADDR_OF(fte_match_param, match,
310 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
311 		       0xff, sizeof(struct in6_addr));
312 	} else {
313 		err = -EOPNOTSUPP;
314 		goto out;
315 	}
316 
317 	MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni);
318 	MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
319 		 mlx5_eswitch_get_vport_metadata_mask());
320 	MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
321 	MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX);
322 	e->recirc_grp = mlx5_create_flow_group(e->ft, in);
323 	if (IS_ERR(e->recirc_grp)) {
324 		err = PTR_ERR(e->recirc_grp);
325 		goto out;
326 	}
327 
328 	INIT_LIST_HEAD(&e->recirc_rules);
329 	e->recirc_cnt = 0;
330 
331 out:
332 	kvfree(in);
333 	return err;
334 }
335 
336 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
337 				       struct mlx5_esw_indir_table_entry *e)
338 {
339 	int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
340 	struct mlx5_flow_destination dest = {};
341 	struct mlx5_flow_act flow_act = {};
342 	struct mlx5_flow_spec *spec;
343 	u32 *in;
344 
345 	in = kvzalloc(inlen, GFP_KERNEL);
346 	if (!in)
347 		return -ENOMEM;
348 
349 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
350 	if (!spec) {
351 		kvfree(in);
352 		return -ENOMEM;
353 	}
354 
355 	/* Hold one entry */
356 	MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
357 	MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
358 	e->fwd_grp = mlx5_create_flow_group(e->ft, in);
359 	if (IS_ERR(e->fwd_grp)) {
360 		err = PTR_ERR(e->fwd_grp);
361 		goto err_out;
362 	}
363 
364 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
365 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
366 	dest.vport.num = e->vport;
367 	dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
368 	dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
369 	e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
370 	if (IS_ERR(e->fwd_rule)) {
371 		mlx5_destroy_flow_group(e->fwd_grp);
372 		err = PTR_ERR(e->fwd_rule);
373 	}
374 
375 err_out:
376 	kvfree(spec);
377 	kvfree(in);
378 	return err;
379 }
380 
381 static struct mlx5_esw_indir_table_entry *
382 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
383 				  struct mlx5_flow_spec *spec, u16 vport, bool decap)
384 {
385 	struct mlx5_flow_table_attr ft_attr = {};
386 	struct mlx5_flow_namespace *root_ns;
387 	struct mlx5_esw_indir_table_entry *e;
388 	struct mlx5_flow_table *ft;
389 	int err = 0;
390 
391 	root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
392 	if (!root_ns)
393 		return ERR_PTR(-ENOENT);
394 
395 	e = kzalloc(sizeof(*e), GFP_KERNEL);
396 	if (!e)
397 		return ERR_PTR(-ENOMEM);
398 
399 	ft_attr.prio = FDB_TC_OFFLOAD;
400 	ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE;
401 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
402 	ft_attr.level = 1;
403 
404 	ft = mlx5_create_flow_table(root_ns, &ft_attr);
405 	if (IS_ERR(ft)) {
406 		err = PTR_ERR(ft);
407 		goto tbl_err;
408 	}
409 	e->ft = ft;
410 	e->vport = vport;
411 	e->ip_version = attr->ip_version;
412 	e->fwd_ref = !decap;
413 
414 	err = mlx5_create_indir_recirc_group(esw, attr, spec, e);
415 	if (err)
416 		goto recirc_grp_err;
417 
418 	if (decap) {
419 		err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
420 		if (err)
421 			goto recirc_rule_err;
422 	}
423 
424 	err = mlx5_create_indir_fwd_group(esw, e);
425 	if (err)
426 		goto fwd_grp_err;
427 
428 	hash_add(esw->fdb_table.offloads.indir->table, &e->hlist,
429 		 vport << 16 | attr->ip_version);
430 
431 	return e;
432 
433 fwd_grp_err:
434 	if (decap)
435 		mlx5_esw_indir_table_rule_put(esw, attr, e);
436 recirc_rule_err:
437 	mlx5_destroy_flow_group(e->recirc_grp);
438 recirc_grp_err:
439 	mlx5_destroy_flow_table(e->ft);
440 tbl_err:
441 	kfree(e);
442 	return ERR_PTR(err);
443 }
444 
445 static struct mlx5_esw_indir_table_entry *
446 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version)
447 {
448 	struct mlx5_esw_indir_table_entry *e;
449 	u32 key = vport << 16 | ip_version;
450 
451 	hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key)
452 		if (e->vport == vport && e->ip_version == ip_version)
453 			return e;
454 
455 	return NULL;
456 }
457 
458 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
459 						 struct mlx5_flow_attr *attr,
460 						 struct mlx5_flow_spec *spec,
461 						 u16 vport, bool decap)
462 {
463 	struct mlx5_esw_indir_table_entry *e;
464 	int err;
465 
466 	mutex_lock(&esw->fdb_table.offloads.indir->lock);
467 	e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
468 	if (e) {
469 		if (!decap) {
470 			e->fwd_ref++;
471 		} else {
472 			err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
473 			if (err)
474 				goto out_err;
475 		}
476 	} else {
477 		e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap);
478 		if (IS_ERR(e)) {
479 			err = PTR_ERR(e);
480 			esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err);
481 			goto out_err;
482 		}
483 	}
484 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
485 	return e->ft;
486 
487 out_err:
488 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
489 	return ERR_PTR(err);
490 }
491 
492 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
493 			      struct mlx5_flow_attr *attr,
494 			      u16 vport, bool decap)
495 {
496 	struct mlx5_esw_indir_table_entry *e;
497 
498 	mutex_lock(&esw->fdb_table.offloads.indir->lock);
499 	e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
500 	if (!e)
501 		goto out;
502 
503 	if (!decap)
504 		e->fwd_ref--;
505 	else
506 		mlx5_esw_indir_table_rule_put(esw, attr, e);
507 
508 	if (e->fwd_ref || e->recirc_cnt)
509 		goto out;
510 
511 	hash_del(&e->hlist);
512 	mlx5_destroy_flow_group(e->recirc_grp);
513 	mlx5_del_flow_rules(e->fwd_rule);
514 	mlx5_destroy_flow_group(e->fwd_grp);
515 	mlx5_destroy_flow_table(e->ft);
516 	kfree(e);
517 out:
518 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
519 }
520