xref: /openbmc/linux/net/sched/act_skbedit.c (revision ac73d4bf)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2008, Intel Corporation.
4  *
5  * Author: Alexander Duyck <alexander.h.duyck@intel.com>
6  */
7 
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/skbuff.h>
12 #include <linux/rtnetlink.h>
13 #include <net/netlink.h>
14 #include <net/pkt_sched.h>
15 #include <net/ip.h>
16 #include <net/ipv6.h>
17 #include <net/dsfield.h>
18 #include <net/pkt_cls.h>
19 
20 #include <linux/tc_act/tc_skbedit.h>
21 #include <net/tc_act/tc_skbedit.h>
22 
23 static struct tc_action_ops act_skbedit_ops;
24 
25 static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
26 			    struct sk_buff *skb)
27 {
28 	u16 queue_mapping = params->queue_mapping;
29 
30 	if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
31 		u32 hash = skb_get_hash(skb);
32 
33 		queue_mapping += hash % params->mapping_mod;
34 	}
35 
36 	return netdev_cap_txqueue(skb->dev, queue_mapping);
37 }
38 
39 static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
40 			   struct tcf_result *res)
41 {
42 	struct tcf_skbedit *d = to_skbedit(a);
43 	struct tcf_skbedit_params *params;
44 	int action;
45 
46 	tcf_lastuse_update(&d->tcf_tm);
47 	bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
48 
49 	params = rcu_dereference_bh(d->params);
50 	action = READ_ONCE(d->tcf_action);
51 
52 	if (params->flags & SKBEDIT_F_PRIORITY)
53 		skb->priority = params->priority;
54 	if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
55 		int wlen = skb_network_offset(skb);
56 
57 		switch (skb_protocol(skb, true)) {
58 		case htons(ETH_P_IP):
59 			wlen += sizeof(struct iphdr);
60 			if (!pskb_may_pull(skb, wlen))
61 				goto err;
62 			skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
63 			break;
64 
65 		case htons(ETH_P_IPV6):
66 			wlen += sizeof(struct ipv6hdr);
67 			if (!pskb_may_pull(skb, wlen))
68 				goto err;
69 			skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
70 			break;
71 		}
72 	}
73 	if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
74 	    skb->dev->real_num_tx_queues > params->queue_mapping) {
75 #ifdef CONFIG_NET_EGRESS
76 		netdev_xmit_skip_txqueue(true);
77 #endif
78 		skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb));
79 	}
80 	if (params->flags & SKBEDIT_F_MARK) {
81 		skb->mark &= ~params->mask;
82 		skb->mark |= params->mark & params->mask;
83 	}
84 	if (params->flags & SKBEDIT_F_PTYPE)
85 		skb->pkt_type = params->ptype;
86 	return action;
87 
88 err:
89 	qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
90 	return TC_ACT_SHOT;
91 }
92 
93 static void tcf_skbedit_stats_update(struct tc_action *a, u64 bytes,
94 				     u64 packets, u64 drops,
95 				     u64 lastuse, bool hw)
96 {
97 	struct tcf_skbedit *d = to_skbedit(a);
98 	struct tcf_t *tm = &d->tcf_tm;
99 
100 	tcf_action_update_stats(a, bytes, packets, drops, hw);
101 	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
102 }
103 
104 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
105 	[TCA_SKBEDIT_PARMS]		= { .len = sizeof(struct tc_skbedit) },
106 	[TCA_SKBEDIT_PRIORITY]		= { .len = sizeof(u32) },
107 	[TCA_SKBEDIT_QUEUE_MAPPING]	= { .len = sizeof(u16) },
108 	[TCA_SKBEDIT_MARK]		= { .len = sizeof(u32) },
109 	[TCA_SKBEDIT_PTYPE]		= { .len = sizeof(u16) },
110 	[TCA_SKBEDIT_MASK]		= { .len = sizeof(u32) },
111 	[TCA_SKBEDIT_FLAGS]		= { .len = sizeof(u64) },
112 	[TCA_SKBEDIT_QUEUE_MAPPING_MAX]	= { .len = sizeof(u16) },
113 };
114 
115 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
116 			    struct nlattr *est, struct tc_action **a,
117 			    struct tcf_proto *tp, u32 act_flags,
118 			    struct netlink_ext_ack *extack)
119 {
120 	struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id);
121 	bool bind = act_flags & TCA_ACT_FLAGS_BIND;
122 	struct tcf_skbedit_params *params_new;
123 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
124 	struct tcf_chain *goto_ch = NULL;
125 	struct tc_skbedit *parm;
126 	struct tcf_skbedit *d;
127 	u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
128 	u16 *queue_mapping = NULL, *ptype = NULL;
129 	u16 mapping_mod = 1;
130 	bool exists = false;
131 	int ret = 0, err;
132 	u32 index;
133 
134 	if (nla == NULL)
135 		return -EINVAL;
136 
137 	err = nla_parse_nested_deprecated(tb, TCA_SKBEDIT_MAX, nla,
138 					  skbedit_policy, NULL);
139 	if (err < 0)
140 		return err;
141 
142 	if (tb[TCA_SKBEDIT_PARMS] == NULL)
143 		return -EINVAL;
144 
145 	if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
146 		flags |= SKBEDIT_F_PRIORITY;
147 		priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
148 	}
149 
150 	if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
151 		if (is_tcf_skbedit_ingress(act_flags) &&
152 		    !(act_flags & TCA_ACT_FLAGS_SKIP_SW)) {
153 			NL_SET_ERR_MSG_MOD(extack, "\"queue_mapping\" option on receive side is hardware only, use skip_sw");
154 			return -EOPNOTSUPP;
155 		}
156 		flags |= SKBEDIT_F_QUEUE_MAPPING;
157 		queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
158 	}
159 
160 	if (tb[TCA_SKBEDIT_PTYPE] != NULL) {
161 		ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]);
162 		if (!skb_pkt_type_ok(*ptype))
163 			return -EINVAL;
164 		flags |= SKBEDIT_F_PTYPE;
165 	}
166 
167 	if (tb[TCA_SKBEDIT_MARK] != NULL) {
168 		flags |= SKBEDIT_F_MARK;
169 		mark = nla_data(tb[TCA_SKBEDIT_MARK]);
170 	}
171 
172 	if (tb[TCA_SKBEDIT_MASK] != NULL) {
173 		flags |= SKBEDIT_F_MASK;
174 		mask = nla_data(tb[TCA_SKBEDIT_MASK]);
175 	}
176 
177 	if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
178 		u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
179 
180 		if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) {
181 			u16 *queue_mapping_max;
182 
183 			if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] ||
184 			    !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) {
185 				NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping.");
186 				return -EINVAL;
187 			}
188 
189 			queue_mapping_max =
190 				nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]);
191 			if (*queue_mapping_max < *queue_mapping) {
192 				NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min.");
193 				return -EINVAL;
194 			}
195 
196 			mapping_mod = *queue_mapping_max - *queue_mapping + 1;
197 			flags |= SKBEDIT_F_TXQ_SKBHASH;
198 		}
199 		if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
200 			flags |= SKBEDIT_F_INHERITDSFIELD;
201 	}
202 
203 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
204 	index = parm->index;
205 	err = tcf_idr_check_alloc(tn, &index, a, bind);
206 	if (err < 0)
207 		return err;
208 	exists = err;
209 	if (exists && bind)
210 		return 0;
211 
212 	if (!flags) {
213 		if (exists)
214 			tcf_idr_release(*a, bind);
215 		else
216 			tcf_idr_cleanup(tn, index);
217 		return -EINVAL;
218 	}
219 
220 	if (!exists) {
221 		ret = tcf_idr_create(tn, index, est, a,
222 				     &act_skbedit_ops, bind, true, act_flags);
223 		if (ret) {
224 			tcf_idr_cleanup(tn, index);
225 			return ret;
226 		}
227 
228 		d = to_skbedit(*a);
229 		ret = ACT_P_CREATED;
230 	} else {
231 		d = to_skbedit(*a);
232 		if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
233 			tcf_idr_release(*a, bind);
234 			return -EEXIST;
235 		}
236 	}
237 	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
238 	if (err < 0)
239 		goto release_idr;
240 
241 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
242 	if (unlikely(!params_new)) {
243 		err = -ENOMEM;
244 		goto put_chain;
245 	}
246 
247 	params_new->flags = flags;
248 	if (flags & SKBEDIT_F_PRIORITY)
249 		params_new->priority = *priority;
250 	if (flags & SKBEDIT_F_QUEUE_MAPPING) {
251 		params_new->queue_mapping = *queue_mapping;
252 		params_new->mapping_mod = mapping_mod;
253 	}
254 	if (flags & SKBEDIT_F_MARK)
255 		params_new->mark = *mark;
256 	if (flags & SKBEDIT_F_PTYPE)
257 		params_new->ptype = *ptype;
258 	/* default behaviour is to use all the bits */
259 	params_new->mask = 0xffffffff;
260 	if (flags & SKBEDIT_F_MASK)
261 		params_new->mask = *mask;
262 
263 	spin_lock_bh(&d->tcf_lock);
264 	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
265 	params_new = rcu_replace_pointer(d->params, params_new,
266 					 lockdep_is_held(&d->tcf_lock));
267 	spin_unlock_bh(&d->tcf_lock);
268 	if (params_new)
269 		kfree_rcu(params_new, rcu);
270 	if (goto_ch)
271 		tcf_chain_put_by_act(goto_ch);
272 
273 	return ret;
274 put_chain:
275 	if (goto_ch)
276 		tcf_chain_put_by_act(goto_ch);
277 release_idr:
278 	tcf_idr_release(*a, bind);
279 	return err;
280 }
281 
282 static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
283 			    int bind, int ref)
284 {
285 	unsigned char *b = skb_tail_pointer(skb);
286 	struct tcf_skbedit *d = to_skbedit(a);
287 	struct tcf_skbedit_params *params;
288 	struct tc_skbedit opt = {
289 		.index   = d->tcf_index,
290 		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
291 		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
292 	};
293 	u64 pure_flags = 0;
294 	struct tcf_t t;
295 
296 	spin_lock_bh(&d->tcf_lock);
297 	params = rcu_dereference_protected(d->params,
298 					   lockdep_is_held(&d->tcf_lock));
299 	opt.action = d->tcf_action;
300 
301 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
302 		goto nla_put_failure;
303 	if ((params->flags & SKBEDIT_F_PRIORITY) &&
304 	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, params->priority))
305 		goto nla_put_failure;
306 	if ((params->flags & SKBEDIT_F_QUEUE_MAPPING) &&
307 	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, params->queue_mapping))
308 		goto nla_put_failure;
309 	if ((params->flags & SKBEDIT_F_MARK) &&
310 	    nla_put_u32(skb, TCA_SKBEDIT_MARK, params->mark))
311 		goto nla_put_failure;
312 	if ((params->flags & SKBEDIT_F_PTYPE) &&
313 	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, params->ptype))
314 		goto nla_put_failure;
315 	if ((params->flags & SKBEDIT_F_MASK) &&
316 	    nla_put_u32(skb, TCA_SKBEDIT_MASK, params->mask))
317 		goto nla_put_failure;
318 	if (params->flags & SKBEDIT_F_INHERITDSFIELD)
319 		pure_flags |= SKBEDIT_F_INHERITDSFIELD;
320 	if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
321 		if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX,
322 				params->queue_mapping + params->mapping_mod - 1))
323 			goto nla_put_failure;
324 
325 		pure_flags |= SKBEDIT_F_TXQ_SKBHASH;
326 	}
327 	if (pure_flags != 0 &&
328 	    nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
329 		goto nla_put_failure;
330 
331 	tcf_tm_dump(&t, &d->tcf_tm);
332 	if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
333 		goto nla_put_failure;
334 	spin_unlock_bh(&d->tcf_lock);
335 
336 	return skb->len;
337 
338 nla_put_failure:
339 	spin_unlock_bh(&d->tcf_lock);
340 	nlmsg_trim(skb, b);
341 	return -1;
342 }
343 
344 static void tcf_skbedit_cleanup(struct tc_action *a)
345 {
346 	struct tcf_skbedit *d = to_skbedit(a);
347 	struct tcf_skbedit_params *params;
348 
349 	params = rcu_dereference_protected(d->params, 1);
350 	if (params)
351 		kfree_rcu(params, rcu);
352 }
353 
354 static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
355 {
356 	return nla_total_size(sizeof(struct tc_skbedit))
357 		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */
358 		+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */
359 		+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */
360 		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */
361 		+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */
362 		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */
363 		+ nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */
364 }
365 
366 static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data,
367 					 u32 *index_inc, bool bind,
368 					 struct netlink_ext_ack *extack)
369 {
370 	if (bind) {
371 		struct flow_action_entry *entry = entry_data;
372 
373 		if (is_tcf_skbedit_mark(act)) {
374 			entry->id = FLOW_ACTION_MARK;
375 			entry->mark = tcf_skbedit_mark(act);
376 		} else if (is_tcf_skbedit_ptype(act)) {
377 			entry->id = FLOW_ACTION_PTYPE;
378 			entry->ptype = tcf_skbedit_ptype(act);
379 		} else if (is_tcf_skbedit_priority(act)) {
380 			entry->id = FLOW_ACTION_PRIORITY;
381 			entry->priority = tcf_skbedit_priority(act);
382 		} else if (is_tcf_skbedit_tx_queue_mapping(act)) {
383 			NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used on transmit side");
384 			return -EOPNOTSUPP;
385 		} else if (is_tcf_skbedit_rx_queue_mapping(act)) {
386 			entry->id = FLOW_ACTION_RX_QUEUE_MAPPING;
387 			entry->rx_queue = tcf_skbedit_rx_queue_mapping(act);
388 		} else if (is_tcf_skbedit_inheritdsfield(act)) {
389 			NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used");
390 			return -EOPNOTSUPP;
391 		} else {
392 			NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload");
393 			return -EOPNOTSUPP;
394 		}
395 		*index_inc = 1;
396 	} else {
397 		struct flow_offload_action *fl_action = entry_data;
398 
399 		if (is_tcf_skbedit_mark(act))
400 			fl_action->id = FLOW_ACTION_MARK;
401 		else if (is_tcf_skbedit_ptype(act))
402 			fl_action->id = FLOW_ACTION_PTYPE;
403 		else if (is_tcf_skbedit_priority(act))
404 			fl_action->id = FLOW_ACTION_PRIORITY;
405 		else if (is_tcf_skbedit_rx_queue_mapping(act))
406 			fl_action->id = FLOW_ACTION_RX_QUEUE_MAPPING;
407 		else
408 			return -EOPNOTSUPP;
409 	}
410 
411 	return 0;
412 }
413 
414 static struct tc_action_ops act_skbedit_ops = {
415 	.kind		=	"skbedit",
416 	.id		=	TCA_ID_SKBEDIT,
417 	.owner		=	THIS_MODULE,
418 	.act		=	tcf_skbedit_act,
419 	.stats_update	=	tcf_skbedit_stats_update,
420 	.dump		=	tcf_skbedit_dump,
421 	.init		=	tcf_skbedit_init,
422 	.cleanup	=	tcf_skbedit_cleanup,
423 	.get_fill_size	=	tcf_skbedit_get_fill_size,
424 	.offload_act_setup =	tcf_skbedit_offload_act_setup,
425 	.size		=	sizeof(struct tcf_skbedit),
426 };
427 
428 static __net_init int skbedit_init_net(struct net *net)
429 {
430 	struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id);
431 
432 	return tc_action_net_init(net, tn, &act_skbedit_ops);
433 }
434 
435 static void __net_exit skbedit_exit_net(struct list_head *net_list)
436 {
437 	tc_action_net_exit(net_list, act_skbedit_ops.net_id);
438 }
439 
440 static struct pernet_operations skbedit_net_ops = {
441 	.init = skbedit_init_net,
442 	.exit_batch = skbedit_exit_net,
443 	.id   = &act_skbedit_ops.net_id,
444 	.size = sizeof(struct tc_action_net),
445 };
446 
447 MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
448 MODULE_DESCRIPTION("SKB Editing");
449 MODULE_LICENSE("GPL");
450 
451 static int __init skbedit_init_module(void)
452 {
453 	return tcf_register_action(&act_skbedit_ops, &skbedit_net_ops);
454 }
455 
456 static void __exit skbedit_cleanup_module(void)
457 {
458 	tcf_unregister_action(&act_skbedit_ops, &skbedit_net_ops);
459 }
460 
461 module_init(skbedit_init_module);
462 module_exit(skbedit_cleanup_module);
463