xref: /openbmc/linux/net/sched/act_mirred.c (revision 96d3c5a7d20ec546e44695983fe0508c6f904248)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/act_mirred.c	packet mirroring and redirect actions
4  *
5  * Authors:	Jamal Hadi Salim (2002-4)
6  *
7  * TODO: Add ingress support (and socket redirect support)
8  */
9 
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/skbuff.h>
15 #include <linux/rtnetlink.h>
16 #include <linux/module.h>
17 #include <linux/init.h>
18 #include <linux/gfp.h>
19 #include <linux/if_arp.h>
20 #include <net/net_namespace.h>
21 #include <net/netlink.h>
22 #include <net/dst.h>
23 #include <net/pkt_sched.h>
24 #include <net/pkt_cls.h>
25 #include <linux/tc_act/tc_mirred.h>
26 #include <net/tc_act/tc_mirred.h>
27 #include <net/tc_wrapper.h>
28 
29 static LIST_HEAD(mirred_list);
30 static DEFINE_SPINLOCK(mirred_list_lock);
31 
32 #define MIRRED_NEST_LIMIT    4
33 static DEFINE_PER_CPU(unsigned int, mirred_nest_level);
34 
tcf_mirred_is_act_redirect(int action)35 static bool tcf_mirred_is_act_redirect(int action)
36 {
37 	return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
38 }
39 
tcf_mirred_act_wants_ingress(int action)40 static bool tcf_mirred_act_wants_ingress(int action)
41 {
42 	switch (action) {
43 	case TCA_EGRESS_REDIR:
44 	case TCA_EGRESS_MIRROR:
45 		return false;
46 	case TCA_INGRESS_REDIR:
47 	case TCA_INGRESS_MIRROR:
48 		return true;
49 	default:
50 		BUG();
51 	}
52 }
53 
tcf_mirred_can_reinsert(int action)54 static bool tcf_mirred_can_reinsert(int action)
55 {
56 	switch (action) {
57 	case TC_ACT_SHOT:
58 	case TC_ACT_STOLEN:
59 	case TC_ACT_QUEUED:
60 	case TC_ACT_TRAP:
61 		return true;
62 	}
63 	return false;
64 }
65 
tcf_mirred_dev_dereference(struct tcf_mirred * m)66 static struct net_device *tcf_mirred_dev_dereference(struct tcf_mirred *m)
67 {
68 	return rcu_dereference_protected(m->tcfm_dev,
69 					 lockdep_is_held(&m->tcf_lock));
70 }
71 
tcf_mirred_release(struct tc_action * a)72 static void tcf_mirred_release(struct tc_action *a)
73 {
74 	struct tcf_mirred *m = to_mirred(a);
75 	struct net_device *dev;
76 
77 	spin_lock(&mirred_list_lock);
78 	list_del(&m->tcfm_list);
79 	spin_unlock(&mirred_list_lock);
80 
81 	/* last reference to action, no need to lock */
82 	dev = rcu_dereference_protected(m->tcfm_dev, 1);
83 	netdev_put(dev, &m->tcfm_dev_tracker);
84 }
85 
86 static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
87 	[TCA_MIRRED_PARMS]	= { .len = sizeof(struct tc_mirred) },
88 };
89 
90 static struct tc_action_ops act_mirred_ops;
91 
tcf_mirred_init(struct net * net,struct nlattr * nla,struct nlattr * est,struct tc_action ** a,struct tcf_proto * tp,u32 flags,struct netlink_ext_ack * extack)92 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
93 			   struct nlattr *est, struct tc_action **a,
94 			   struct tcf_proto *tp,
95 			   u32 flags, struct netlink_ext_ack *extack)
96 {
97 	struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id);
98 	bool bind = flags & TCA_ACT_FLAGS_BIND;
99 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
100 	struct tcf_chain *goto_ch = NULL;
101 	bool mac_header_xmit = false;
102 	struct tc_mirred *parm;
103 	struct tcf_mirred *m;
104 	bool exists = false;
105 	int ret, err;
106 	u32 index;
107 
108 	if (!nla) {
109 		NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
110 		return -EINVAL;
111 	}
112 	ret = nla_parse_nested_deprecated(tb, TCA_MIRRED_MAX, nla,
113 					  mirred_policy, extack);
114 	if (ret < 0)
115 		return ret;
116 	if (!tb[TCA_MIRRED_PARMS]) {
117 		NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters");
118 		return -EINVAL;
119 	}
120 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
121 	index = parm->index;
122 	err = tcf_idr_check_alloc(tn, &index, a, bind);
123 	if (err < 0)
124 		return err;
125 	exists = err;
126 	if (exists && bind)
127 		return 0;
128 
129 	switch (parm->eaction) {
130 	case TCA_EGRESS_MIRROR:
131 	case TCA_EGRESS_REDIR:
132 	case TCA_INGRESS_REDIR:
133 	case TCA_INGRESS_MIRROR:
134 		break;
135 	default:
136 		if (exists)
137 			tcf_idr_release(*a, bind);
138 		else
139 			tcf_idr_cleanup(tn, index);
140 		NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
141 		return -EINVAL;
142 	}
143 
144 	if (!exists) {
145 		if (!parm->ifindex) {
146 			tcf_idr_cleanup(tn, index);
147 			NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
148 			return -EINVAL;
149 		}
150 		ret = tcf_idr_create_from_flags(tn, index, est, a,
151 						&act_mirred_ops, bind, flags);
152 		if (ret) {
153 			tcf_idr_cleanup(tn, index);
154 			return ret;
155 		}
156 		ret = ACT_P_CREATED;
157 	} else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
158 		tcf_idr_release(*a, bind);
159 		return -EEXIST;
160 	}
161 
162 	m = to_mirred(*a);
163 	if (ret == ACT_P_CREATED)
164 		INIT_LIST_HEAD(&m->tcfm_list);
165 
166 	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
167 	if (err < 0)
168 		goto release_idr;
169 
170 	spin_lock_bh(&m->tcf_lock);
171 
172 	if (parm->ifindex) {
173 		struct net_device *odev, *ndev;
174 
175 		ndev = dev_get_by_index(net, parm->ifindex);
176 		if (!ndev) {
177 			spin_unlock_bh(&m->tcf_lock);
178 			err = -ENODEV;
179 			goto put_chain;
180 		}
181 		mac_header_xmit = dev_is_mac_header_xmit(ndev);
182 		odev = rcu_replace_pointer(m->tcfm_dev, ndev,
183 					  lockdep_is_held(&m->tcf_lock));
184 		netdev_put(odev, &m->tcfm_dev_tracker);
185 		netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC);
186 		m->tcfm_mac_header_xmit = mac_header_xmit;
187 	}
188 	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
189 	m->tcfm_eaction = parm->eaction;
190 	spin_unlock_bh(&m->tcf_lock);
191 	if (goto_ch)
192 		tcf_chain_put_by_act(goto_ch);
193 
194 	if (ret == ACT_P_CREATED) {
195 		spin_lock(&mirred_list_lock);
196 		list_add(&m->tcfm_list, &mirred_list);
197 		spin_unlock(&mirred_list_lock);
198 	}
199 
200 	return ret;
201 put_chain:
202 	if (goto_ch)
203 		tcf_chain_put_by_act(goto_ch);
204 release_idr:
205 	tcf_idr_release(*a, bind);
206 	return err;
207 }
208 
209 static int
tcf_mirred_forward(bool at_ingress,bool want_ingress,struct sk_buff * skb)210 tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
211 {
212 	int err;
213 
214 	if (!want_ingress)
215 		err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
216 	else if (!at_ingress)
217 		err = netif_rx(skb);
218 	else
219 		err = netif_receive_skb(skb);
220 
221 	return err;
222 }
223 
tcf_mirred_to_dev(struct sk_buff * skb,struct tcf_mirred * m,struct net_device * dev,const bool m_mac_header_xmit,int m_eaction,int retval)224 static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
225 			     struct net_device *dev,
226 			     const bool m_mac_header_xmit, int m_eaction,
227 			     int retval)
228 {
229 	struct sk_buff *skb_to_send = skb;
230 	bool want_ingress;
231 	bool is_redirect;
232 	bool expects_nh;
233 	bool at_ingress;
234 	bool dont_clone;
235 	int mac_len;
236 	bool at_nh;
237 	int err;
238 
239 	is_redirect = tcf_mirred_is_act_redirect(m_eaction);
240 	if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
241 		net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
242 				       dev->name);
243 		goto err_cant_do;
244 	}
245 
246 	/* we could easily avoid the clone only if called by ingress and clsact;
247 	 * since we can't easily detect the clsact caller, skip clone only for
248 	 * ingress - that covers the TC S/W datapath.
249 	 */
250 	at_ingress = skb_at_tc_ingress(skb);
251 	dont_clone = skb_at_tc_ingress(skb) && is_redirect &&
252 		tcf_mirred_can_reinsert(retval);
253 	if (!dont_clone) {
254 		skb_to_send = skb_clone(skb, GFP_ATOMIC);
255 		if (!skb_to_send)
256 			goto err_cant_do;
257 	}
258 
259 	want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
260 
261 	/* All mirred/redirected skbs should clear previous ct info */
262 	nf_reset_ct(skb_to_send);
263 	if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */
264 		skb_dst_drop(skb_to_send);
265 
266 	expects_nh = want_ingress || !m_mac_header_xmit;
267 	at_nh = skb->data == skb_network_header(skb);
268 	if (at_nh != expects_nh) {
269 		mac_len = at_ingress ? skb->mac_len :
270 			  skb_network_offset(skb);
271 		if (expects_nh) {
272 			/* target device/action expect data at nh */
273 			skb_pull_rcsum(skb_to_send, mac_len);
274 		} else {
275 			/* target device/action expect data at mac */
276 			skb_push_rcsum(skb_to_send, mac_len);
277 		}
278 	}
279 
280 	skb_to_send->skb_iif = skb->dev->ifindex;
281 	skb_to_send->dev = dev;
282 
283 	if (is_redirect) {
284 		if (skb == skb_to_send)
285 			retval = TC_ACT_CONSUMED;
286 
287 		skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
288 
289 		err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
290 	} else {
291 		err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
292 	}
293 	if (err)
294 		tcf_action_inc_overlimit_qstats(&m->common);
295 
296 	return retval;
297 
298 err_cant_do:
299 	if (is_redirect)
300 		retval = TC_ACT_SHOT;
301 	tcf_action_inc_overlimit_qstats(&m->common);
302 	return retval;
303 }
304 
tcf_mirred_act(struct sk_buff * skb,const struct tc_action * a,struct tcf_result * res)305 TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
306 				     const struct tc_action *a,
307 				     struct tcf_result *res)
308 {
309 	struct tcf_mirred *m = to_mirred(a);
310 	int retval = READ_ONCE(m->tcf_action);
311 	unsigned int nest_level;
312 	bool m_mac_header_xmit;
313 	struct net_device *dev;
314 	int m_eaction;
315 
316 	nest_level = __this_cpu_inc_return(mirred_nest_level);
317 	if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
318 		net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
319 				     netdev_name(skb->dev));
320 		retval = TC_ACT_SHOT;
321 		goto dec_nest_level;
322 	}
323 
324 	tcf_lastuse_update(&m->tcf_tm);
325 	tcf_action_update_bstats(&m->common, skb);
326 
327 	dev = rcu_dereference_bh(m->tcfm_dev);
328 	if (unlikely(!dev)) {
329 		pr_notice_once("tc mirred: target device is gone\n");
330 		tcf_action_inc_overlimit_qstats(&m->common);
331 		goto dec_nest_level;
332 	}
333 
334 	m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
335 	m_eaction = READ_ONCE(m->tcfm_eaction);
336 
337 	retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction,
338 				   retval);
339 
340 dec_nest_level:
341 	__this_cpu_dec(mirred_nest_level);
342 
343 	return retval;
344 }
345 
tcf_stats_update(struct tc_action * a,u64 bytes,u64 packets,u64 drops,u64 lastuse,bool hw)346 static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
347 			     u64 drops, u64 lastuse, bool hw)
348 {
349 	struct tcf_mirred *m = to_mirred(a);
350 	struct tcf_t *tm = &m->tcf_tm;
351 
352 	tcf_action_update_stats(a, bytes, packets, drops, hw);
353 	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
354 }
355 
tcf_mirred_dump(struct sk_buff * skb,struct tc_action * a,int bind,int ref)356 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
357 			   int ref)
358 {
359 	unsigned char *b = skb_tail_pointer(skb);
360 	struct tcf_mirred *m = to_mirred(a);
361 	struct tc_mirred opt = {
362 		.index   = m->tcf_index,
363 		.refcnt  = refcount_read(&m->tcf_refcnt) - ref,
364 		.bindcnt = atomic_read(&m->tcf_bindcnt) - bind,
365 	};
366 	struct net_device *dev;
367 	struct tcf_t t;
368 
369 	spin_lock_bh(&m->tcf_lock);
370 	opt.action = m->tcf_action;
371 	opt.eaction = m->tcfm_eaction;
372 	dev = tcf_mirred_dev_dereference(m);
373 	if (dev)
374 		opt.ifindex = dev->ifindex;
375 
376 	if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
377 		goto nla_put_failure;
378 
379 	tcf_tm_dump(&t, &m->tcf_tm);
380 	if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
381 		goto nla_put_failure;
382 	spin_unlock_bh(&m->tcf_lock);
383 
384 	return skb->len;
385 
386 nla_put_failure:
387 	spin_unlock_bh(&m->tcf_lock);
388 	nlmsg_trim(skb, b);
389 	return -1;
390 }
391 
mirred_device_event(struct notifier_block * unused,unsigned long event,void * ptr)392 static int mirred_device_event(struct notifier_block *unused,
393 			       unsigned long event, void *ptr)
394 {
395 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
396 	struct tcf_mirred *m;
397 
398 	ASSERT_RTNL();
399 	if (event == NETDEV_UNREGISTER) {
400 		spin_lock(&mirred_list_lock);
401 		list_for_each_entry(m, &mirred_list, tcfm_list) {
402 			spin_lock_bh(&m->tcf_lock);
403 			if (tcf_mirred_dev_dereference(m) == dev) {
404 				netdev_put(dev, &m->tcfm_dev_tracker);
405 				/* Note : no rcu grace period necessary, as
406 				 * net_device are already rcu protected.
407 				 */
408 				RCU_INIT_POINTER(m->tcfm_dev, NULL);
409 			}
410 			spin_unlock_bh(&m->tcf_lock);
411 		}
412 		spin_unlock(&mirred_list_lock);
413 	}
414 
415 	return NOTIFY_DONE;
416 }
417 
418 static struct notifier_block mirred_device_notifier = {
419 	.notifier_call = mirred_device_event,
420 };
421 
tcf_mirred_dev_put(void * priv)422 static void tcf_mirred_dev_put(void *priv)
423 {
424 	struct net_device *dev = priv;
425 
426 	dev_put(dev);
427 }
428 
429 static struct net_device *
tcf_mirred_get_dev(const struct tc_action * a,tc_action_priv_destructor * destructor)430 tcf_mirred_get_dev(const struct tc_action *a,
431 		   tc_action_priv_destructor *destructor)
432 {
433 	struct tcf_mirred *m = to_mirred(a);
434 	struct net_device *dev;
435 
436 	rcu_read_lock();
437 	dev = rcu_dereference(m->tcfm_dev);
438 	if (dev) {
439 		dev_hold(dev);
440 		*destructor = tcf_mirred_dev_put;
441 	}
442 	rcu_read_unlock();
443 
444 	return dev;
445 }
446 
tcf_mirred_get_fill_size(const struct tc_action * act)447 static size_t tcf_mirred_get_fill_size(const struct tc_action *act)
448 {
449 	return nla_total_size(sizeof(struct tc_mirred));
450 }
451 
tcf_offload_mirred_get_dev(struct flow_action_entry * entry,const struct tc_action * act)452 static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry,
453 				       const struct tc_action *act)
454 {
455 	entry->dev = act->ops->get_dev(act, &entry->destructor);
456 	if (!entry->dev)
457 		return;
458 	entry->destructor_priv = entry->dev;
459 }
460 
tcf_mirred_offload_act_setup(struct tc_action * act,void * entry_data,u32 * index_inc,bool bind,struct netlink_ext_ack * extack)461 static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data,
462 					u32 *index_inc, bool bind,
463 					struct netlink_ext_ack *extack)
464 {
465 	if (bind) {
466 		struct flow_action_entry *entry = entry_data;
467 
468 		if (is_tcf_mirred_egress_redirect(act)) {
469 			entry->id = FLOW_ACTION_REDIRECT;
470 			tcf_offload_mirred_get_dev(entry, act);
471 		} else if (is_tcf_mirred_egress_mirror(act)) {
472 			entry->id = FLOW_ACTION_MIRRED;
473 			tcf_offload_mirred_get_dev(entry, act);
474 		} else if (is_tcf_mirred_ingress_redirect(act)) {
475 			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
476 			tcf_offload_mirred_get_dev(entry, act);
477 		} else if (is_tcf_mirred_ingress_mirror(act)) {
478 			entry->id = FLOW_ACTION_MIRRED_INGRESS;
479 			tcf_offload_mirred_get_dev(entry, act);
480 		} else {
481 			NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload");
482 			return -EOPNOTSUPP;
483 		}
484 		*index_inc = 1;
485 	} else {
486 		struct flow_offload_action *fl_action = entry_data;
487 
488 		if (is_tcf_mirred_egress_redirect(act))
489 			fl_action->id = FLOW_ACTION_REDIRECT;
490 		else if (is_tcf_mirred_egress_mirror(act))
491 			fl_action->id = FLOW_ACTION_MIRRED;
492 		else if (is_tcf_mirred_ingress_redirect(act))
493 			fl_action->id = FLOW_ACTION_REDIRECT_INGRESS;
494 		else if (is_tcf_mirred_ingress_mirror(act))
495 			fl_action->id = FLOW_ACTION_MIRRED_INGRESS;
496 		else
497 			return -EOPNOTSUPP;
498 	}
499 
500 	return 0;
501 }
502 
503 static struct tc_action_ops act_mirred_ops = {
504 	.kind		=	"mirred",
505 	.id		=	TCA_ID_MIRRED,
506 	.owner		=	THIS_MODULE,
507 	.act		=	tcf_mirred_act,
508 	.stats_update	=	tcf_stats_update,
509 	.dump		=	tcf_mirred_dump,
510 	.cleanup	=	tcf_mirred_release,
511 	.init		=	tcf_mirred_init,
512 	.get_fill_size	=	tcf_mirred_get_fill_size,
513 	.offload_act_setup =	tcf_mirred_offload_act_setup,
514 	.size		=	sizeof(struct tcf_mirred),
515 	.get_dev	=	tcf_mirred_get_dev,
516 };
517 
mirred_init_net(struct net * net)518 static __net_init int mirred_init_net(struct net *net)
519 {
520 	struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id);
521 
522 	return tc_action_net_init(net, tn, &act_mirred_ops);
523 }
524 
mirred_exit_net(struct list_head * net_list)525 static void __net_exit mirred_exit_net(struct list_head *net_list)
526 {
527 	tc_action_net_exit(net_list, act_mirred_ops.net_id);
528 }
529 
530 static struct pernet_operations mirred_net_ops = {
531 	.init = mirred_init_net,
532 	.exit_batch = mirred_exit_net,
533 	.id   = &act_mirred_ops.net_id,
534 	.size = sizeof(struct tc_action_net),
535 };
536 
537 MODULE_AUTHOR("Jamal Hadi Salim(2002)");
538 MODULE_DESCRIPTION("Device Mirror/redirect actions");
539 MODULE_LICENSE("GPL");
540 
mirred_init_module(void)541 static int __init mirred_init_module(void)
542 {
543 	int err = register_netdevice_notifier(&mirred_device_notifier);
544 	if (err)
545 		return err;
546 
547 	pr_info("Mirror/redirect action on\n");
548 	err = tcf_register_action(&act_mirred_ops, &mirred_net_ops);
549 	if (err)
550 		unregister_netdevice_notifier(&mirred_device_notifier);
551 
552 	return err;
553 }
554 
mirred_cleanup_module(void)555 static void __exit mirred_cleanup_module(void)
556 {
557 	tcf_unregister_action(&act_mirred_ops, &mirred_net_ops);
558 	unregister_netdevice_notifier(&mirred_device_notifier);
559 }
560 
561 module_init(mirred_init_module);
562 module_exit(mirred_cleanup_module);
563