xref: /openbmc/linux/net/sched/cls_api.c (revision 8ee90c5c)
1 /*
2  * net/sched/cls_api.c	Packet classifier API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  *
13  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
14  *
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/kmod.h>
26 #include <linux/err.h>
27 #include <linux/slab.h>
28 #include <net/net_namespace.h>
29 #include <net/sock.h>
30 #include <net/netlink.h>
31 #include <net/pkt_sched.h>
32 #include <net/pkt_cls.h>
33 
34 /* The list of all installed classifier types */
35 static LIST_HEAD(tcf_proto_base);
36 
37 /* Protects list of registered TC modules. It is pure SMP lock. */
38 static DEFINE_RWLOCK(cls_mod_lock);
39 
40 /* Find classifier type by string name */
41 
42 static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
43 {
44 	const struct tcf_proto_ops *t, *res = NULL;
45 
46 	if (kind) {
47 		read_lock(&cls_mod_lock);
48 		list_for_each_entry(t, &tcf_proto_base, head) {
49 			if (strcmp(kind, t->kind) == 0) {
50 				if (try_module_get(t->owner))
51 					res = t;
52 				break;
53 			}
54 		}
55 		read_unlock(&cls_mod_lock);
56 	}
57 	return res;
58 }
59 
60 /* Register(unregister) new classifier type */
61 
62 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
63 {
64 	struct tcf_proto_ops *t;
65 	int rc = -EEXIST;
66 
67 	write_lock(&cls_mod_lock);
68 	list_for_each_entry(t, &tcf_proto_base, head)
69 		if (!strcmp(ops->kind, t->kind))
70 			goto out;
71 
72 	list_add_tail(&ops->head, &tcf_proto_base);
73 	rc = 0;
74 out:
75 	write_unlock(&cls_mod_lock);
76 	return rc;
77 }
78 EXPORT_SYMBOL(register_tcf_proto_ops);
79 
80 static struct workqueue_struct *tc_filter_wq;
81 
82 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
83 {
84 	struct tcf_proto_ops *t;
85 	int rc = -ENOENT;
86 
87 	/* Wait for outstanding call_rcu()s, if any, from a
88 	 * tcf_proto_ops's destroy() handler.
89 	 */
90 	rcu_barrier();
91 	flush_workqueue(tc_filter_wq);
92 
93 	write_lock(&cls_mod_lock);
94 	list_for_each_entry(t, &tcf_proto_base, head) {
95 		if (t == ops) {
96 			list_del(&t->head);
97 			rc = 0;
98 			break;
99 		}
100 	}
101 	write_unlock(&cls_mod_lock);
102 	return rc;
103 }
104 EXPORT_SYMBOL(unregister_tcf_proto_ops);
105 
106 bool tcf_queue_work(struct work_struct *work)
107 {
108 	return queue_work(tc_filter_wq, work);
109 }
110 EXPORT_SYMBOL(tcf_queue_work);
111 
112 /* Select new prio value from the range, managed by kernel. */
113 
114 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
115 {
116 	u32 first = TC_H_MAKE(0xC0000000U, 0U);
117 
118 	if (tp)
119 		first = tp->prio - 1;
120 
121 	return TC_H_MAJ(first);
122 }
123 
124 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
125 					  u32 prio, u32 parent, struct Qdisc *q,
126 					  struct tcf_chain *chain)
127 {
128 	struct tcf_proto *tp;
129 	int err;
130 
131 	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
132 	if (!tp)
133 		return ERR_PTR(-ENOBUFS);
134 
135 	err = -ENOENT;
136 	tp->ops = tcf_proto_lookup_ops(kind);
137 	if (!tp->ops) {
138 #ifdef CONFIG_MODULES
139 		rtnl_unlock();
140 		request_module("cls_%s", kind);
141 		rtnl_lock();
142 		tp->ops = tcf_proto_lookup_ops(kind);
143 		/* We dropped the RTNL semaphore in order to perform
144 		 * the module load. So, even if we succeeded in loading
145 		 * the module we have to replay the request. We indicate
146 		 * this using -EAGAIN.
147 		 */
148 		if (tp->ops) {
149 			module_put(tp->ops->owner);
150 			err = -EAGAIN;
151 		} else {
152 			err = -ENOENT;
153 		}
154 		goto errout;
155 #endif
156 	}
157 	tp->classify = tp->ops->classify;
158 	tp->protocol = protocol;
159 	tp->prio = prio;
160 	tp->classid = parent;
161 	tp->q = q;
162 	tp->chain = chain;
163 
164 	err = tp->ops->init(tp);
165 	if (err) {
166 		module_put(tp->ops->owner);
167 		goto errout;
168 	}
169 	return tp;
170 
171 errout:
172 	kfree(tp);
173 	return ERR_PTR(err);
174 }
175 
176 static void tcf_proto_destroy(struct tcf_proto *tp)
177 {
178 	tp->ops->destroy(tp);
179 	module_put(tp->ops->owner);
180 	kfree_rcu(tp, rcu);
181 }
182 
183 static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
184 					  u32 chain_index)
185 {
186 	struct tcf_chain *chain;
187 
188 	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
189 	if (!chain)
190 		return NULL;
191 	list_add_tail(&chain->list, &block->chain_list);
192 	chain->block = block;
193 	chain->index = chain_index;
194 	chain->refcnt = 1;
195 	return chain;
196 }
197 
198 static void tcf_chain_flush(struct tcf_chain *chain)
199 {
200 	struct tcf_proto *tp;
201 
202 	if (chain->p_filter_chain)
203 		RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
204 	while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
205 		RCU_INIT_POINTER(chain->filter_chain, tp->next);
206 		tcf_chain_put(chain);
207 		tcf_proto_destroy(tp);
208 	}
209 }
210 
211 static void tcf_chain_destroy(struct tcf_chain *chain)
212 {
213 	list_del(&chain->list);
214 	kfree(chain);
215 }
216 
217 static void tcf_chain_hold(struct tcf_chain *chain)
218 {
219 	++chain->refcnt;
220 }
221 
222 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
223 				bool create)
224 {
225 	struct tcf_chain *chain;
226 
227 	list_for_each_entry(chain, &block->chain_list, list) {
228 		if (chain->index == chain_index) {
229 			tcf_chain_hold(chain);
230 			return chain;
231 		}
232 	}
233 
234 	return create ? tcf_chain_create(block, chain_index) : NULL;
235 }
236 EXPORT_SYMBOL(tcf_chain_get);
237 
238 void tcf_chain_put(struct tcf_chain *chain)
239 {
240 	if (--chain->refcnt == 0)
241 		tcf_chain_destroy(chain);
242 }
243 EXPORT_SYMBOL(tcf_chain_put);
244 
245 static void
246 tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
247 			       struct tcf_proto __rcu **p_filter_chain)
248 {
249 	chain->p_filter_chain = p_filter_chain;
250 }
251 
252 int tcf_block_get(struct tcf_block **p_block,
253 		  struct tcf_proto __rcu **p_filter_chain)
254 {
255 	struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
256 	struct tcf_chain *chain;
257 	int err;
258 
259 	if (!block)
260 		return -ENOMEM;
261 	INIT_LIST_HEAD(&block->chain_list);
262 	/* Create chain 0 by default, it has to be always present. */
263 	chain = tcf_chain_create(block, 0);
264 	if (!chain) {
265 		err = -ENOMEM;
266 		goto err_chain_create;
267 	}
268 	tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
269 	*p_block = block;
270 	return 0;
271 
272 err_chain_create:
273 	kfree(block);
274 	return err;
275 }
276 EXPORT_SYMBOL(tcf_block_get);
277 
278 static void tcf_block_put_final(struct work_struct *work)
279 {
280 	struct tcf_block *block = container_of(work, struct tcf_block, work);
281 	struct tcf_chain *chain, *tmp;
282 
283 	/* At this point, all the chains should have refcnt == 1. */
284 	rtnl_lock();
285 	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
286 		tcf_chain_put(chain);
287 	rtnl_unlock();
288 	kfree(block);
289 }
290 
291 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
292  * actions should be all removed after flushing. However, filters are destroyed
293  * in RCU callbacks, we have to hold the chains first, otherwise we would
294  * always race with RCU callbacks on this list without proper locking.
295  */
296 static void tcf_block_put_deferred(struct work_struct *work)
297 {
298 	struct tcf_block *block = container_of(work, struct tcf_block, work);
299 	struct tcf_chain *chain;
300 
301 	rtnl_lock();
302 	/* Hold a refcnt for all chains, except 0, in case they are gone. */
303 	list_for_each_entry(chain, &block->chain_list, list)
304 		if (chain->index)
305 			tcf_chain_hold(chain);
306 
307 	/* No race on the list, because no chain could be destroyed. */
308 	list_for_each_entry(chain, &block->chain_list, list)
309 		tcf_chain_flush(chain);
310 
311 	INIT_WORK(&block->work, tcf_block_put_final);
312 	/* Wait for RCU callbacks to release the reference count and make
313 	 * sure their works have been queued before this.
314 	 */
315 	rcu_barrier();
316 	tcf_queue_work(&block->work);
317 	rtnl_unlock();
318 }
319 
320 void tcf_block_put(struct tcf_block *block)
321 {
322 	if (!block)
323 		return;
324 
325 	INIT_WORK(&block->work, tcf_block_put_deferred);
326 	/* Wait for existing RCU callbacks to cool down, make sure their works
327 	 * have been queued before this. We can not flush pending works here
328 	 * because we are holding the RTNL lock.
329 	 */
330 	rcu_barrier();
331 	tcf_queue_work(&block->work);
332 }
333 EXPORT_SYMBOL(tcf_block_put);
334 
335 /* Main classifier routine: scans classifier chain attached
336  * to this qdisc, (optionally) tests for protocol and asks
337  * specific classifiers.
338  */
339 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
340 		 struct tcf_result *res, bool compat_mode)
341 {
342 	__be16 protocol = tc_skb_protocol(skb);
343 #ifdef CONFIG_NET_CLS_ACT
344 	const int max_reclassify_loop = 4;
345 	const struct tcf_proto *orig_tp = tp;
346 	const struct tcf_proto *first_tp;
347 	int limit = 0;
348 
349 reclassify:
350 #endif
351 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
352 		int err;
353 
354 		if (tp->protocol != protocol &&
355 		    tp->protocol != htons(ETH_P_ALL))
356 			continue;
357 
358 		err = tp->classify(skb, tp, res);
359 #ifdef CONFIG_NET_CLS_ACT
360 		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
361 			first_tp = orig_tp;
362 			goto reset;
363 		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
364 			first_tp = res->goto_tp;
365 			goto reset;
366 		}
367 #endif
368 		if (err >= 0)
369 			return err;
370 	}
371 
372 	return TC_ACT_UNSPEC; /* signal: continue lookup */
373 #ifdef CONFIG_NET_CLS_ACT
374 reset:
375 	if (unlikely(limit++ >= max_reclassify_loop)) {
376 		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
377 				       tp->q->ops->id, tp->prio & 0xffff,
378 				       ntohs(tp->protocol));
379 		return TC_ACT_SHOT;
380 	}
381 
382 	tp = first_tp;
383 	protocol = tc_skb_protocol(skb);
384 	goto reclassify;
385 #endif
386 }
387 EXPORT_SYMBOL(tcf_classify);
388 
389 struct tcf_chain_info {
390 	struct tcf_proto __rcu **pprev;
391 	struct tcf_proto __rcu *next;
392 };
393 
394 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
395 {
396 	return rtnl_dereference(*chain_info->pprev);
397 }
398 
399 static void tcf_chain_tp_insert(struct tcf_chain *chain,
400 				struct tcf_chain_info *chain_info,
401 				struct tcf_proto *tp)
402 {
403 	if (chain->p_filter_chain &&
404 	    *chain_info->pprev == chain->filter_chain)
405 		rcu_assign_pointer(*chain->p_filter_chain, tp);
406 	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
407 	rcu_assign_pointer(*chain_info->pprev, tp);
408 	tcf_chain_hold(chain);
409 }
410 
411 static void tcf_chain_tp_remove(struct tcf_chain *chain,
412 				struct tcf_chain_info *chain_info,
413 				struct tcf_proto *tp)
414 {
415 	struct tcf_proto *next = rtnl_dereference(chain_info->next);
416 
417 	if (chain->p_filter_chain && tp == chain->filter_chain)
418 		RCU_INIT_POINTER(*chain->p_filter_chain, next);
419 	RCU_INIT_POINTER(*chain_info->pprev, next);
420 	tcf_chain_put(chain);
421 }
422 
423 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
424 					   struct tcf_chain_info *chain_info,
425 					   u32 protocol, u32 prio,
426 					   bool prio_allocate)
427 {
428 	struct tcf_proto **pprev;
429 	struct tcf_proto *tp;
430 
431 	/* Check the chain for existence of proto-tcf with this priority */
432 	for (pprev = &chain->filter_chain;
433 	     (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
434 		if (tp->prio >= prio) {
435 			if (tp->prio == prio) {
436 				if (prio_allocate ||
437 				    (tp->protocol != protocol && protocol))
438 					return ERR_PTR(-EINVAL);
439 			} else {
440 				tp = NULL;
441 			}
442 			break;
443 		}
444 	}
445 	chain_info->pprev = pprev;
446 	chain_info->next = tp ? tp->next : NULL;
447 	return tp;
448 }
449 
450 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
451 			 struct tcf_proto *tp, void *fh, u32 portid,
452 			 u32 seq, u16 flags, int event)
453 {
454 	struct tcmsg *tcm;
455 	struct nlmsghdr  *nlh;
456 	unsigned char *b = skb_tail_pointer(skb);
457 
458 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
459 	if (!nlh)
460 		goto out_nlmsg_trim;
461 	tcm = nlmsg_data(nlh);
462 	tcm->tcm_family = AF_UNSPEC;
463 	tcm->tcm__pad1 = 0;
464 	tcm->tcm__pad2 = 0;
465 	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
466 	tcm->tcm_parent = tp->classid;
467 	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
468 	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
469 		goto nla_put_failure;
470 	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
471 		goto nla_put_failure;
472 	if (!fh) {
473 		tcm->tcm_handle = 0;
474 	} else {
475 		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
476 			goto nla_put_failure;
477 	}
478 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
479 	return skb->len;
480 
481 out_nlmsg_trim:
482 nla_put_failure:
483 	nlmsg_trim(skb, b);
484 	return -1;
485 }
486 
487 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
488 			  struct nlmsghdr *n, struct tcf_proto *tp,
489 			  void *fh, int event, bool unicast)
490 {
491 	struct sk_buff *skb;
492 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
493 
494 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
495 	if (!skb)
496 		return -ENOBUFS;
497 
498 	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
499 			  n->nlmsg_flags, event) <= 0) {
500 		kfree_skb(skb);
501 		return -EINVAL;
502 	}
503 
504 	if (unicast)
505 		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
506 
507 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
508 			      n->nlmsg_flags & NLM_F_ECHO);
509 }
510 
511 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
512 			      struct nlmsghdr *n, struct tcf_proto *tp,
513 			      void *fh, bool unicast, bool *last)
514 {
515 	struct sk_buff *skb;
516 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
517 	int err;
518 
519 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
520 	if (!skb)
521 		return -ENOBUFS;
522 
523 	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
524 			  n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
525 		kfree_skb(skb);
526 		return -EINVAL;
527 	}
528 
529 	err = tp->ops->delete(tp, fh, last);
530 	if (err) {
531 		kfree_skb(skb);
532 		return err;
533 	}
534 
535 	if (unicast)
536 		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
537 
538 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
539 			      n->nlmsg_flags & NLM_F_ECHO);
540 }
541 
542 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
543 				 struct nlmsghdr *n,
544 				 struct tcf_chain *chain, int event)
545 {
546 	struct tcf_proto *tp;
547 
548 	for (tp = rtnl_dereference(chain->filter_chain);
549 	     tp; tp = rtnl_dereference(tp->next))
550 		tfilter_notify(net, oskb, n, tp, 0, event, false);
551 }
552 
553 /* Add/change/delete/get a filter node */
554 
555 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
556 			  struct netlink_ext_ack *extack)
557 {
558 	struct net *net = sock_net(skb->sk);
559 	struct nlattr *tca[TCA_MAX + 1];
560 	struct tcmsg *t;
561 	u32 protocol;
562 	u32 prio;
563 	bool prio_allocate;
564 	u32 parent;
565 	u32 chain_index;
566 	struct net_device *dev;
567 	struct Qdisc  *q;
568 	struct tcf_chain_info chain_info;
569 	struct tcf_chain *chain = NULL;
570 	struct tcf_block *block;
571 	struct tcf_proto *tp;
572 	const struct Qdisc_class_ops *cops;
573 	unsigned long cl;
574 	void *fh;
575 	int err;
576 	int tp_created;
577 
578 	if ((n->nlmsg_type != RTM_GETTFILTER) &&
579 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
580 		return -EPERM;
581 
582 replay:
583 	tp_created = 0;
584 
585 	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
586 	if (err < 0)
587 		return err;
588 
589 	t = nlmsg_data(n);
590 	protocol = TC_H_MIN(t->tcm_info);
591 	prio = TC_H_MAJ(t->tcm_info);
592 	prio_allocate = false;
593 	parent = t->tcm_parent;
594 	cl = 0;
595 
596 	if (prio == 0) {
597 		switch (n->nlmsg_type) {
598 		case RTM_DELTFILTER:
599 			if (protocol || t->tcm_handle || tca[TCA_KIND])
600 				return -ENOENT;
601 			break;
602 		case RTM_NEWTFILTER:
603 			/* If no priority is provided by the user,
604 			 * we allocate one.
605 			 */
606 			if (n->nlmsg_flags & NLM_F_CREATE) {
607 				prio = TC_H_MAKE(0x80000000U, 0U);
608 				prio_allocate = true;
609 				break;
610 			}
611 			/* fall-through */
612 		default:
613 			return -ENOENT;
614 		}
615 	}
616 
617 	/* Find head of filter chain. */
618 
619 	/* Find link */
620 	dev = __dev_get_by_index(net, t->tcm_ifindex);
621 	if (dev == NULL)
622 		return -ENODEV;
623 
624 	/* Find qdisc */
625 	if (!parent) {
626 		q = dev->qdisc;
627 		parent = q->handle;
628 	} else {
629 		q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
630 		if (q == NULL)
631 			return -EINVAL;
632 	}
633 
634 	/* Is it classful? */
635 	cops = q->ops->cl_ops;
636 	if (!cops)
637 		return -EINVAL;
638 
639 	if (!cops->tcf_block)
640 		return -EOPNOTSUPP;
641 
642 	/* Do we search for filter, attached to class? */
643 	if (TC_H_MIN(parent)) {
644 		cl = cops->find(q, parent);
645 		if (cl == 0)
646 			return -ENOENT;
647 	}
648 
649 	/* And the last stroke */
650 	block = cops->tcf_block(q, cl);
651 	if (!block) {
652 		err = -EINVAL;
653 		goto errout;
654 	}
655 
656 	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
657 	if (chain_index > TC_ACT_EXT_VAL_MASK) {
658 		err = -EINVAL;
659 		goto errout;
660 	}
661 	chain = tcf_chain_get(block, chain_index,
662 			      n->nlmsg_type == RTM_NEWTFILTER);
663 	if (!chain) {
664 		err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
665 		goto errout;
666 	}
667 
668 	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
669 		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
670 		tcf_chain_flush(chain);
671 		err = 0;
672 		goto errout;
673 	}
674 
675 	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
676 			       prio, prio_allocate);
677 	if (IS_ERR(tp)) {
678 		err = PTR_ERR(tp);
679 		goto errout;
680 	}
681 
682 	if (tp == NULL) {
683 		/* Proto-tcf does not exist, create new one */
684 
685 		if (tca[TCA_KIND] == NULL || !protocol) {
686 			err = -EINVAL;
687 			goto errout;
688 		}
689 
690 		if (n->nlmsg_type != RTM_NEWTFILTER ||
691 		    !(n->nlmsg_flags & NLM_F_CREATE)) {
692 			err = -ENOENT;
693 			goto errout;
694 		}
695 
696 		if (prio_allocate)
697 			prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
698 
699 		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
700 				      protocol, prio, parent, q, chain);
701 		if (IS_ERR(tp)) {
702 			err = PTR_ERR(tp);
703 			goto errout;
704 		}
705 		tp_created = 1;
706 	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
707 		err = -EINVAL;
708 		goto errout;
709 	}
710 
711 	fh = tp->ops->get(tp, t->tcm_handle);
712 
713 	if (!fh) {
714 		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
715 			tcf_chain_tp_remove(chain, &chain_info, tp);
716 			tfilter_notify(net, skb, n, tp, fh,
717 				       RTM_DELTFILTER, false);
718 			tcf_proto_destroy(tp);
719 			err = 0;
720 			goto errout;
721 		}
722 
723 		if (n->nlmsg_type != RTM_NEWTFILTER ||
724 		    !(n->nlmsg_flags & NLM_F_CREATE)) {
725 			err = -ENOENT;
726 			goto errout;
727 		}
728 	} else {
729 		bool last;
730 
731 		switch (n->nlmsg_type) {
732 		case RTM_NEWTFILTER:
733 			if (n->nlmsg_flags & NLM_F_EXCL) {
734 				if (tp_created)
735 					tcf_proto_destroy(tp);
736 				err = -EEXIST;
737 				goto errout;
738 			}
739 			break;
740 		case RTM_DELTFILTER:
741 			err = tfilter_del_notify(net, skb, n, tp, fh, false,
742 						 &last);
743 			if (err)
744 				goto errout;
745 			if (last) {
746 				tcf_chain_tp_remove(chain, &chain_info, tp);
747 				tcf_proto_destroy(tp);
748 			}
749 			goto errout;
750 		case RTM_GETTFILTER:
751 			err = tfilter_notify(net, skb, n, tp, fh,
752 					     RTM_NEWTFILTER, true);
753 			goto errout;
754 		default:
755 			err = -EINVAL;
756 			goto errout;
757 		}
758 	}
759 
760 	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
761 			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
762 	if (err == 0) {
763 		if (tp_created)
764 			tcf_chain_tp_insert(chain, &chain_info, tp);
765 		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
766 	} else {
767 		if (tp_created)
768 			tcf_proto_destroy(tp);
769 	}
770 
771 errout:
772 	if (chain)
773 		tcf_chain_put(chain);
774 	if (err == -EAGAIN)
775 		/* Replay the request. */
776 		goto replay;
777 	return err;
778 }
779 
780 struct tcf_dump_args {
781 	struct tcf_walker w;
782 	struct sk_buff *skb;
783 	struct netlink_callback *cb;
784 };
785 
786 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
787 {
788 	struct tcf_dump_args *a = (void *)arg;
789 	struct net *net = sock_net(a->skb->sk);
790 
791 	return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
792 			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
793 			     RTM_NEWTFILTER);
794 }
795 
796 static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
797 			   struct netlink_callback *cb,
798 			   long index_start, long *p_index)
799 {
800 	struct net *net = sock_net(skb->sk);
801 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
802 	struct tcf_dump_args arg;
803 	struct tcf_proto *tp;
804 
805 	for (tp = rtnl_dereference(chain->filter_chain);
806 	     tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
807 		if (*p_index < index_start)
808 			continue;
809 		if (TC_H_MAJ(tcm->tcm_info) &&
810 		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
811 			continue;
812 		if (TC_H_MIN(tcm->tcm_info) &&
813 		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
814 			continue;
815 		if (*p_index > index_start)
816 			memset(&cb->args[1], 0,
817 			       sizeof(cb->args) - sizeof(cb->args[0]));
818 		if (cb->args[1] == 0) {
819 			if (tcf_fill_node(net, skb, tp, 0,
820 					  NETLINK_CB(cb->skb).portid,
821 					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
822 					  RTM_NEWTFILTER) <= 0)
823 				return false;
824 
825 			cb->args[1] = 1;
826 		}
827 		if (!tp->ops->walk)
828 			continue;
829 		arg.w.fn = tcf_node_dump;
830 		arg.skb = skb;
831 		arg.cb = cb;
832 		arg.w.stop = 0;
833 		arg.w.skip = cb->args[1] - 1;
834 		arg.w.count = 0;
835 		tp->ops->walk(tp, &arg.w);
836 		cb->args[1] = arg.w.count + 1;
837 		if (arg.w.stop)
838 			return false;
839 	}
840 	return true;
841 }
842 
843 /* called with RTNL */
844 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
845 {
846 	struct net *net = sock_net(skb->sk);
847 	struct nlattr *tca[TCA_MAX + 1];
848 	struct net_device *dev;
849 	struct Qdisc *q;
850 	struct tcf_block *block;
851 	struct tcf_chain *chain;
852 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
853 	unsigned long cl = 0;
854 	const struct Qdisc_class_ops *cops;
855 	long index_start;
856 	long index;
857 	int err;
858 
859 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
860 		return skb->len;
861 
862 	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
863 	if (err)
864 		return err;
865 
866 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
867 	if (!dev)
868 		return skb->len;
869 
870 	if (!tcm->tcm_parent)
871 		q = dev->qdisc;
872 	else
873 		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
874 	if (!q)
875 		goto out;
876 	cops = q->ops->cl_ops;
877 	if (!cops)
878 		goto out;
879 	if (!cops->tcf_block)
880 		goto out;
881 	if (TC_H_MIN(tcm->tcm_parent)) {
882 		cl = cops->find(q, tcm->tcm_parent);
883 		if (cl == 0)
884 			goto out;
885 	}
886 	block = cops->tcf_block(q, cl);
887 	if (!block)
888 		goto out;
889 
890 	index_start = cb->args[0];
891 	index = 0;
892 
893 	list_for_each_entry(chain, &block->chain_list, list) {
894 		if (tca[TCA_CHAIN] &&
895 		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
896 			continue;
897 		if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
898 			break;
899 	}
900 
901 	cb->args[0] = index;
902 
903 out:
904 	return skb->len;
905 }
906 
907 void tcf_exts_destroy(struct tcf_exts *exts)
908 {
909 #ifdef CONFIG_NET_CLS_ACT
910 	LIST_HEAD(actions);
911 
912 	ASSERT_RTNL();
913 	tcf_exts_to_list(exts, &actions);
914 	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
915 	kfree(exts->actions);
916 	exts->nr_actions = 0;
917 #endif
918 }
919 EXPORT_SYMBOL(tcf_exts_destroy);
920 
921 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
922 		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
923 {
924 #ifdef CONFIG_NET_CLS_ACT
925 	{
926 		struct tc_action *act;
927 
928 		if (exts->police && tb[exts->police]) {
929 			act = tcf_action_init_1(net, tp, tb[exts->police],
930 						rate_tlv, "police", ovr,
931 						TCA_ACT_BIND);
932 			if (IS_ERR(act))
933 				return PTR_ERR(act);
934 
935 			act->type = exts->type = TCA_OLD_COMPAT;
936 			exts->actions[0] = act;
937 			exts->nr_actions = 1;
938 		} else if (exts->action && tb[exts->action]) {
939 			LIST_HEAD(actions);
940 			int err, i = 0;
941 
942 			err = tcf_action_init(net, tp, tb[exts->action],
943 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
944 					      &actions);
945 			if (err)
946 				return err;
947 			list_for_each_entry(act, &actions, list)
948 				exts->actions[i++] = act;
949 			exts->nr_actions = i;
950 		}
951 	}
952 #else
953 	if ((exts->action && tb[exts->action]) ||
954 	    (exts->police && tb[exts->police]))
955 		return -EOPNOTSUPP;
956 #endif
957 
958 	return 0;
959 }
960 EXPORT_SYMBOL(tcf_exts_validate);
961 
962 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
963 {
964 #ifdef CONFIG_NET_CLS_ACT
965 	struct tcf_exts old = *dst;
966 
967 	*dst = *src;
968 	tcf_exts_destroy(&old);
969 #endif
970 }
971 EXPORT_SYMBOL(tcf_exts_change);
972 
973 #ifdef CONFIG_NET_CLS_ACT
974 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
975 {
976 	if (exts->nr_actions == 0)
977 		return NULL;
978 	else
979 		return exts->actions[0];
980 }
981 #endif
982 
983 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
984 {
985 #ifdef CONFIG_NET_CLS_ACT
986 	struct nlattr *nest;
987 
988 	if (exts->action && tcf_exts_has_actions(exts)) {
989 		/*
990 		 * again for backward compatible mode - we want
991 		 * to work with both old and new modes of entering
992 		 * tc data even if iproute2  was newer - jhs
993 		 */
994 		if (exts->type != TCA_OLD_COMPAT) {
995 			LIST_HEAD(actions);
996 
997 			nest = nla_nest_start(skb, exts->action);
998 			if (nest == NULL)
999 				goto nla_put_failure;
1000 
1001 			tcf_exts_to_list(exts, &actions);
1002 			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
1003 				goto nla_put_failure;
1004 			nla_nest_end(skb, nest);
1005 		} else if (exts->police) {
1006 			struct tc_action *act = tcf_exts_first_act(exts);
1007 			nest = nla_nest_start(skb, exts->police);
1008 			if (nest == NULL || !act)
1009 				goto nla_put_failure;
1010 			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
1011 				goto nla_put_failure;
1012 			nla_nest_end(skb, nest);
1013 		}
1014 	}
1015 	return 0;
1016 
1017 nla_put_failure:
1018 	nla_nest_cancel(skb, nest);
1019 	return -1;
1020 #else
1021 	return 0;
1022 #endif
1023 }
1024 EXPORT_SYMBOL(tcf_exts_dump);
1025 
1026 
1027 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
1028 {
1029 #ifdef CONFIG_NET_CLS_ACT
1030 	struct tc_action *a = tcf_exts_first_act(exts);
1031 	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
1032 		return -1;
1033 #endif
1034 	return 0;
1035 }
1036 EXPORT_SYMBOL(tcf_exts_dump_stats);
1037 
1038 int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
1039 		     struct net_device **hw_dev)
1040 {
1041 #ifdef CONFIG_NET_CLS_ACT
1042 	const struct tc_action *a;
1043 	LIST_HEAD(actions);
1044 
1045 	if (!tcf_exts_has_actions(exts))
1046 		return -EINVAL;
1047 
1048 	tcf_exts_to_list(exts, &actions);
1049 	list_for_each_entry(a, &actions, list) {
1050 		if (a->ops->get_dev) {
1051 			a->ops->get_dev(a, dev_net(dev), hw_dev);
1052 			break;
1053 		}
1054 	}
1055 	if (*hw_dev)
1056 		return 0;
1057 #endif
1058 	return -EOPNOTSUPP;
1059 }
1060 EXPORT_SYMBOL(tcf_exts_get_dev);
1061 
1062 static int __init tc_filter_init(void)
1063 {
1064 	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
1065 	if (!tc_filter_wq)
1066 		return -ENOMEM;
1067 
1068 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
1069 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
1070 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
1071 		      tc_dump_tfilter, 0);
1072 
1073 	return 0;
1074 }
1075 
1076 subsys_initcall(tc_filter_init);
1077