1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <linux/etherdevice.h>
5 #include <linux/idr.h>
6 #include <linux/mlx5/driver.h>
7 #include <linux/mlx5/mlx5_ifc.h>
8 #include <linux/mlx5/vport.h>
9 #include <linux/mlx5/fs.h>
10 #include "mlx5_core.h"
11 #include "eswitch.h"
12 #include "en.h"
13 #include "en_tc.h"
14 #include "fs_core.h"
15 #include "esw/indir_table.h"
16 #include "lib/fs_chains.h"
17 
18 #define MLX5_ESW_INDIR_TABLE_SIZE 128
19 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
20 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1)
21 
22 struct mlx5_esw_indir_table_rule {
23 	struct list_head list;
24 	struct mlx5_flow_handle *handle;
25 	union {
26 		__be32 v4;
27 		struct in6_addr v6;
28 	} dst_ip;
29 	u32 vni;
30 	struct mlx5_modify_hdr *mh;
31 	refcount_t refcnt;
32 };
33 
34 struct mlx5_esw_indir_table_entry {
35 	struct hlist_node hlist;
36 	struct mlx5_flow_table *ft;
37 	struct mlx5_flow_group *recirc_grp;
38 	struct mlx5_flow_group *fwd_grp;
39 	struct mlx5_flow_handle *fwd_rule;
40 	struct list_head recirc_rules;
41 	int recirc_cnt;
42 	int fwd_ref;
43 
44 	u16 vport;
45 	u8 ip_version;
46 };
47 
48 struct mlx5_esw_indir_table {
49 	struct mutex lock; /* protects table */
50 	DECLARE_HASHTABLE(table, 8);
51 };
52 
53 struct mlx5_esw_indir_table *
54 mlx5_esw_indir_table_init(void)
55 {
56 	struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL);
57 
58 	if (!indir)
59 		return ERR_PTR(-ENOMEM);
60 
61 	mutex_init(&indir->lock);
62 	hash_init(indir->table);
63 	return indir;
64 }
65 
66 void
67 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
68 {
69 	mutex_destroy(&indir->lock);
70 	kvfree(indir);
71 }
72 
73 bool
74 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
75 			    struct mlx5_flow_attr *attr,
76 			    u16 vport_num,
77 			    struct mlx5_core_dev *dest_mdev)
78 {
79 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
80 
81 	/* Use indirect table for all IP traffic from UL to VF with vport
82 	 * destination when source rewrite flag is set.
83 	 */
84 	return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
85 		mlx5_eswitch_is_vf_vport(esw, vport_num) &&
86 		esw->dev == dest_mdev &&
87 		attr->ip_version &&
88 		attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
89 }
90 
91 u16
92 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
93 {
94 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
95 
96 	return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0;
97 }
98 
99 static struct mlx5_esw_indir_table_rule *
100 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e,
101 				 struct mlx5_esw_flow_attr *attr)
102 {
103 	struct mlx5_esw_indir_table_rule *rule;
104 
105 	list_for_each_entry(rule, &e->recirc_rules, list)
106 		if (rule->vni == attr->rx_tun_attr->vni &&
107 		    !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip,
108 			    sizeof(attr->rx_tun_attr->dst_ip)))
109 			goto found;
110 	return NULL;
111 
112 found:
113 	refcount_inc(&rule->refcnt);
114 	return rule;
115 }
116 
117 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
118 					 struct mlx5_flow_attr *attr,
119 					 struct mlx5_flow_spec *spec,
120 					 struct mlx5_esw_indir_table_entry *e)
121 {
122 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
123 	struct mlx5_fs_chains *chains = esw_chains(esw);
124 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
125 	struct mlx5_flow_destination dest = {};
126 	struct mlx5_esw_indir_table_rule *rule;
127 	struct mlx5_flow_act flow_act = {};
128 	struct mlx5_flow_spec *rule_spec;
129 	struct mlx5_flow_handle *handle;
130 	int err = 0;
131 	u32 data;
132 
133 	rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr);
134 	if (rule)
135 		return 0;
136 
137 	if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX)
138 		return -EINVAL;
139 
140 	rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
141 	if (!rule_spec)
142 		return -ENOMEM;
143 
144 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
145 	if (!rule) {
146 		err = -ENOMEM;
147 		goto out;
148 	}
149 
150 	rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
151 					   MLX5_MATCH_MISC_PARAMETERS |
152 					   MLX5_MATCH_MISC_PARAMETERS_2;
153 	if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) {
154 		MLX5_SET(fte_match_param, rule_spec->match_criteria,
155 			 outer_headers.ip_version, 0xf);
156 		MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version,
157 			 attr->ip_version);
158 	} else if (attr->ip_version) {
159 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
160 				 outer_headers.ethertype);
161 		MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype,
162 			 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6));
163 	} else {
164 		err = -EOPNOTSUPP;
165 		goto err_ethertype;
166 	}
167 
168 	if (attr->ip_version == 4) {
169 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
170 				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
171 		MLX5_SET(fte_match_param, rule_spec->match_value,
172 			 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
173 			 ntohl(esw_attr->rx_tun_attr->dst_ip.v4));
174 	} else if (attr->ip_version == 6) {
175 		int len = sizeof(struct in6_addr);
176 
177 		memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
178 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
179 		       0xff, len);
180 		memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
181 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
182 		       &esw_attr->rx_tun_attr->dst_ip.v6, len);
183 	}
184 
185 	MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
186 			 misc_parameters.vxlan_vni);
187 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni,
188 		 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni));
189 
190 	MLX5_SET(fte_match_param, rule_spec->match_criteria,
191 		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
192 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
193 		 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch,
194 							   MLX5_VPORT_UPLINK));
195 
196 	/* Modify flow source to recirculate packet */
197 	data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport);
198 	err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
199 					VPORT_TO_REG, data);
200 	if (err)
201 		goto err_mod_hdr_regc0;
202 
203 	err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
204 					TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT);
205 	if (err)
206 		goto err_mod_hdr_regc1;
207 
208 	flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
209 						       mod_acts.num_actions, mod_acts.actions);
210 	if (IS_ERR(flow_act.modify_hdr)) {
211 		err = PTR_ERR(flow_act.modify_hdr);
212 		goto err_mod_hdr_alloc;
213 	}
214 
215 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
216 	flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
217 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
218 	dest.ft = mlx5_chains_get_table(chains, 0, 1, 0);
219 	if (IS_ERR(dest.ft)) {
220 		err = PTR_ERR(dest.ft);
221 		goto err_table;
222 	}
223 	handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1);
224 	if (IS_ERR(handle)) {
225 		err = PTR_ERR(handle);
226 		goto err_handle;
227 	}
228 
229 	dealloc_mod_hdr_actions(&mod_acts);
230 	rule->handle = handle;
231 	rule->vni = esw_attr->rx_tun_attr->vni;
232 	rule->mh = flow_act.modify_hdr;
233 	memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
234 	       sizeof(esw_attr->rx_tun_attr->dst_ip));
235 	refcount_set(&rule->refcnt, 1);
236 	list_add(&rule->list, &e->recirc_rules);
237 	e->recirc_cnt++;
238 	goto out;
239 
240 err_handle:
241 	mlx5_chains_put_table(chains, 0, 1, 0);
242 err_table:
243 	mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
244 err_mod_hdr_alloc:
245 err_mod_hdr_regc1:
246 	dealloc_mod_hdr_actions(&mod_acts);
247 err_mod_hdr_regc0:
248 err_ethertype:
249 	kfree(rule);
250 out:
251 	kvfree(rule_spec);
252 	return err;
253 }
254 
255 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw,
256 					  struct mlx5_flow_attr *attr,
257 					  struct mlx5_esw_indir_table_entry *e)
258 {
259 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
260 	struct mlx5_fs_chains *chains = esw_chains(esw);
261 	struct mlx5_esw_indir_table_rule *rule;
262 
263 	list_for_each_entry(rule, &e->recirc_rules, list)
264 		if (rule->vni == esw_attr->rx_tun_attr->vni &&
265 		    !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
266 			    sizeof(esw_attr->rx_tun_attr->dst_ip)))
267 			goto found;
268 
269 	return;
270 
271 found:
272 	if (!refcount_dec_and_test(&rule->refcnt))
273 		return;
274 
275 	mlx5_del_flow_rules(rule->handle);
276 	mlx5_chains_put_table(chains, 0, 1, 0);
277 	mlx5_modify_header_dealloc(esw->dev, rule->mh);
278 	list_del(&rule->list);
279 	kfree(rule);
280 	e->recirc_cnt--;
281 }
282 
283 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
284 					  struct mlx5_flow_attr *attr,
285 					  struct mlx5_flow_spec *spec,
286 					  struct mlx5_esw_indir_table_entry *e)
287 {
288 	int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
289 	u32 *in, *match;
290 
291 	in = kvzalloc(inlen, GFP_KERNEL);
292 	if (!in)
293 		return -ENOMEM;
294 
295 	MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
296 		 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2);
297 	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
298 
299 	if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version))
300 		MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf);
301 	else
302 		MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype);
303 
304 	if (attr->ip_version == 4) {
305 		MLX5_SET_TO_ONES(fte_match_param, match,
306 				 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
307 	} else if (attr->ip_version == 6) {
308 		memset(MLX5_ADDR_OF(fte_match_param, match,
309 				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
310 		       0xff, sizeof(struct in6_addr));
311 	} else {
312 		err = -EOPNOTSUPP;
313 		goto out;
314 	}
315 
316 	MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni);
317 	MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
318 		 mlx5_eswitch_get_vport_metadata_mask());
319 	MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
320 	MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX);
321 	e->recirc_grp = mlx5_create_flow_group(e->ft, in);
322 	if (IS_ERR(e->recirc_grp)) {
323 		err = PTR_ERR(e->recirc_grp);
324 		goto out;
325 	}
326 
327 	INIT_LIST_HEAD(&e->recirc_rules);
328 	e->recirc_cnt = 0;
329 
330 out:
331 	kvfree(in);
332 	return err;
333 }
334 
335 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
336 				       struct mlx5_esw_indir_table_entry *e)
337 {
338 	int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
339 	struct mlx5_flow_destination dest = {};
340 	struct mlx5_flow_act flow_act = {};
341 	struct mlx5_flow_spec *spec;
342 	u32 *in;
343 
344 	in = kvzalloc(inlen, GFP_KERNEL);
345 	if (!in)
346 		return -ENOMEM;
347 
348 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
349 	if (!spec) {
350 		kvfree(in);
351 		return -ENOMEM;
352 	}
353 
354 	/* Hold one entry */
355 	MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
356 	MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
357 	e->fwd_grp = mlx5_create_flow_group(e->ft, in);
358 	if (IS_ERR(e->fwd_grp)) {
359 		err = PTR_ERR(e->fwd_grp);
360 		goto err_out;
361 	}
362 
363 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
364 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
365 	dest.vport.num = e->vport;
366 	dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
367 	dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
368 	e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
369 	if (IS_ERR(e->fwd_rule)) {
370 		mlx5_destroy_flow_group(e->fwd_grp);
371 		err = PTR_ERR(e->fwd_rule);
372 	}
373 
374 err_out:
375 	kvfree(spec);
376 	kvfree(in);
377 	return err;
378 }
379 
380 static struct mlx5_esw_indir_table_entry *
381 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
382 				  struct mlx5_flow_spec *spec, u16 vport, bool decap)
383 {
384 	struct mlx5_flow_table_attr ft_attr = {};
385 	struct mlx5_flow_namespace *root_ns;
386 	struct mlx5_esw_indir_table_entry *e;
387 	struct mlx5_flow_table *ft;
388 	int err = 0;
389 
390 	root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
391 	if (!root_ns)
392 		return ERR_PTR(-ENOENT);
393 
394 	e = kzalloc(sizeof(*e), GFP_KERNEL);
395 	if (!e)
396 		return ERR_PTR(-ENOMEM);
397 
398 	ft_attr.prio = FDB_TC_OFFLOAD;
399 	ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE;
400 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
401 	ft_attr.level = 1;
402 
403 	ft = mlx5_create_flow_table(root_ns, &ft_attr);
404 	if (IS_ERR(ft)) {
405 		err = PTR_ERR(ft);
406 		goto tbl_err;
407 	}
408 	e->ft = ft;
409 	e->vport = vport;
410 	e->ip_version = attr->ip_version;
411 	e->fwd_ref = !decap;
412 
413 	err = mlx5_create_indir_recirc_group(esw, attr, spec, e);
414 	if (err)
415 		goto recirc_grp_err;
416 
417 	if (decap) {
418 		err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
419 		if (err)
420 			goto recirc_rule_err;
421 	}
422 
423 	err = mlx5_create_indir_fwd_group(esw, e);
424 	if (err)
425 		goto fwd_grp_err;
426 
427 	hash_add(esw->fdb_table.offloads.indir->table, &e->hlist,
428 		 vport << 16 | attr->ip_version);
429 
430 	return e;
431 
432 fwd_grp_err:
433 	if (decap)
434 		mlx5_esw_indir_table_rule_put(esw, attr, e);
435 recirc_rule_err:
436 	mlx5_destroy_flow_group(e->recirc_grp);
437 recirc_grp_err:
438 	mlx5_destroy_flow_table(e->ft);
439 tbl_err:
440 	kfree(e);
441 	return ERR_PTR(err);
442 }
443 
444 static struct mlx5_esw_indir_table_entry *
445 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version)
446 {
447 	struct mlx5_esw_indir_table_entry *e;
448 	u32 key = vport << 16 | ip_version;
449 
450 	hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key)
451 		if (e->vport == vport && e->ip_version == ip_version)
452 			return e;
453 
454 	return NULL;
455 }
456 
457 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
458 						 struct mlx5_flow_attr *attr,
459 						 struct mlx5_flow_spec *spec,
460 						 u16 vport, bool decap)
461 {
462 	struct mlx5_esw_indir_table_entry *e;
463 	int err;
464 
465 	mutex_lock(&esw->fdb_table.offloads.indir->lock);
466 	e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
467 	if (e) {
468 		if (!decap) {
469 			e->fwd_ref++;
470 		} else {
471 			err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
472 			if (err)
473 				goto out_err;
474 		}
475 	} else {
476 		e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap);
477 		if (IS_ERR(e)) {
478 			err = PTR_ERR(e);
479 			esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err);
480 			goto out_err;
481 		}
482 	}
483 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
484 	return e->ft;
485 
486 out_err:
487 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
488 	return ERR_PTR(err);
489 }
490 
491 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
492 			      struct mlx5_flow_attr *attr,
493 			      u16 vport, bool decap)
494 {
495 	struct mlx5_esw_indir_table_entry *e;
496 
497 	mutex_lock(&esw->fdb_table.offloads.indir->lock);
498 	e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
499 	if (!e)
500 		goto out;
501 
502 	if (!decap)
503 		e->fwd_ref--;
504 	else
505 		mlx5_esw_indir_table_rule_put(esw, attr, e);
506 
507 	if (e->fwd_ref || e->recirc_cnt)
508 		goto out;
509 
510 	hash_del(&e->hlist);
511 	mlx5_destroy_flow_group(e->recirc_grp);
512 	mlx5_del_flow_rules(e->fwd_rule);
513 	mlx5_destroy_flow_group(e->fwd_grp);
514 	mlx5_destroy_flow_table(e->ft);
515 	kfree(e);
516 out:
517 	mutex_unlock(&esw->fdb_table.offloads.indir->lock);
518 }
519