xref: /openbmc/linux/net/sched/cls_u32.c (revision 1802d0beecafe581ad584634ba92f8a471d8a63a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  *	The filters are packed to hash tables of key nodes
8  *	with a set of 32bit key/mask pairs at every node.
9  *	Nodes reference next level hash tables etc.
10  *
11  *	This scheme is the best universal classifier I managed to
12  *	invent; it is not super-fast, but it is not slow (provided you
13  *	program it correctly), and general enough.  And its relative
14  *	speed grows as the number of rules becomes larger.
15  *
16  *	It seems that it represents the best middle point between
17  *	speed and manageability both by human and by machine.
18  *
19  *	It is especially useful for link sharing combined with QoS;
20  *	pure RSVP doesn't need such a general approach and can use
21  *	much simpler (and faster) schemes, sort of cls_rsvp.c.
22  *
23  *	JHS: We should remove the CONFIG_NET_CLS_IND from here
24  *	eventually when the meta match extension is made available
25  *
26  *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
27  */
28 
29 #include <linux/module.h>
30 #include <linux/slab.h>
31 #include <linux/types.h>
32 #include <linux/kernel.h>
33 #include <linux/string.h>
34 #include <linux/errno.h>
35 #include <linux/percpu.h>
36 #include <linux/rtnetlink.h>
37 #include <linux/skbuff.h>
38 #include <linux/bitmap.h>
39 #include <linux/netdevice.h>
40 #include <linux/hash.h>
41 #include <net/netlink.h>
42 #include <net/act_api.h>
43 #include <net/pkt_cls.h>
44 #include <linux/idr.h>
45 
46 struct tc_u_knode {
47 	struct tc_u_knode __rcu	*next;
48 	u32			handle;
49 	struct tc_u_hnode __rcu	*ht_up;
50 	struct tcf_exts		exts;
51 #ifdef CONFIG_NET_CLS_IND
52 	int			ifindex;
53 #endif
54 	u8			fshift;
55 	struct tcf_result	res;
56 	struct tc_u_hnode __rcu	*ht_down;
57 #ifdef CONFIG_CLS_U32_PERF
58 	struct tc_u32_pcnt __percpu *pf;
59 #endif
60 	u32			flags;
61 	unsigned int		in_hw_count;
62 #ifdef CONFIG_CLS_U32_MARK
63 	u32			val;
64 	u32			mask;
65 	u32 __percpu		*pcpu_success;
66 #endif
67 	struct rcu_work		rwork;
68 	/* The 'sel' field MUST be the last field in structure to allow for
69 	 * tc_u32_keys allocated at end of structure.
70 	 */
71 	struct tc_u32_sel	sel;
72 };
73 
74 struct tc_u_hnode {
75 	struct tc_u_hnode __rcu	*next;
76 	u32			handle;
77 	u32			prio;
78 	int			refcnt;
79 	unsigned int		divisor;
80 	struct idr		handle_idr;
81 	bool			is_root;
82 	struct rcu_head		rcu;
83 	u32			flags;
84 	/* The 'ht' field MUST be the last field in structure to allow for
85 	 * more entries allocated at end of structure.
86 	 */
87 	struct tc_u_knode __rcu	*ht[1];
88 };
89 
90 struct tc_u_common {
91 	struct tc_u_hnode __rcu	*hlist;
92 	void			*ptr;
93 	int			refcnt;
94 	struct idr		handle_idr;
95 	struct hlist_node	hnode;
96 	long			knodes;
97 };
98 
99 static inline unsigned int u32_hash_fold(__be32 key,
100 					 const struct tc_u32_sel *sel,
101 					 u8 fshift)
102 {
103 	unsigned int h = ntohl(key & sel->hmask) >> fshift;
104 
105 	return h;
106 }
107 
108 static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
109 			struct tcf_result *res)
110 {
111 	struct {
112 		struct tc_u_knode *knode;
113 		unsigned int	  off;
114 	} stack[TC_U32_MAXDEPTH];
115 
116 	struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
117 	unsigned int off = skb_network_offset(skb);
118 	struct tc_u_knode *n;
119 	int sdepth = 0;
120 	int off2 = 0;
121 	int sel = 0;
122 #ifdef CONFIG_CLS_U32_PERF
123 	int j;
124 #endif
125 	int i, r;
126 
127 next_ht:
128 	n = rcu_dereference_bh(ht->ht[sel]);
129 
130 next_knode:
131 	if (n) {
132 		struct tc_u32_key *key = n->sel.keys;
133 
134 #ifdef CONFIG_CLS_U32_PERF
135 		__this_cpu_inc(n->pf->rcnt);
136 		j = 0;
137 #endif
138 
139 		if (tc_skip_sw(n->flags)) {
140 			n = rcu_dereference_bh(n->next);
141 			goto next_knode;
142 		}
143 
144 #ifdef CONFIG_CLS_U32_MARK
145 		if ((skb->mark & n->mask) != n->val) {
146 			n = rcu_dereference_bh(n->next);
147 			goto next_knode;
148 		} else {
149 			__this_cpu_inc(*n->pcpu_success);
150 		}
151 #endif
152 
153 		for (i = n->sel.nkeys; i > 0; i--, key++) {
154 			int toff = off + key->off + (off2 & key->offmask);
155 			__be32 *data, hdata;
156 
157 			if (skb_headroom(skb) + toff > INT_MAX)
158 				goto out;
159 
160 			data = skb_header_pointer(skb, toff, 4, &hdata);
161 			if (!data)
162 				goto out;
163 			if ((*data ^ key->val) & key->mask) {
164 				n = rcu_dereference_bh(n->next);
165 				goto next_knode;
166 			}
167 #ifdef CONFIG_CLS_U32_PERF
168 			__this_cpu_inc(n->pf->kcnts[j]);
169 			j++;
170 #endif
171 		}
172 
173 		ht = rcu_dereference_bh(n->ht_down);
174 		if (!ht) {
175 check_terminal:
176 			if (n->sel.flags & TC_U32_TERMINAL) {
177 
178 				*res = n->res;
179 #ifdef CONFIG_NET_CLS_IND
180 				if (!tcf_match_indev(skb, n->ifindex)) {
181 					n = rcu_dereference_bh(n->next);
182 					goto next_knode;
183 				}
184 #endif
185 #ifdef CONFIG_CLS_U32_PERF
186 				__this_cpu_inc(n->pf->rhit);
187 #endif
188 				r = tcf_exts_exec(skb, &n->exts, res);
189 				if (r < 0) {
190 					n = rcu_dereference_bh(n->next);
191 					goto next_knode;
192 				}
193 
194 				return r;
195 			}
196 			n = rcu_dereference_bh(n->next);
197 			goto next_knode;
198 		}
199 
200 		/* PUSH */
201 		if (sdepth >= TC_U32_MAXDEPTH)
202 			goto deadloop;
203 		stack[sdepth].knode = n;
204 		stack[sdepth].off = off;
205 		sdepth++;
206 
207 		ht = rcu_dereference_bh(n->ht_down);
208 		sel = 0;
209 		if (ht->divisor) {
210 			__be32 *data, hdata;
211 
212 			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
213 						  &hdata);
214 			if (!data)
215 				goto out;
216 			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
217 							  n->fshift);
218 		}
219 		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
220 			goto next_ht;
221 
222 		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
223 			off2 = n->sel.off + 3;
224 			if (n->sel.flags & TC_U32_VAROFFSET) {
225 				__be16 *data, hdata;
226 
227 				data = skb_header_pointer(skb,
228 							  off + n->sel.offoff,
229 							  2, &hdata);
230 				if (!data)
231 					goto out;
232 				off2 += ntohs(n->sel.offmask & *data) >>
233 					n->sel.offshift;
234 			}
235 			off2 &= ~3;
236 		}
237 		if (n->sel.flags & TC_U32_EAT) {
238 			off += off2;
239 			off2 = 0;
240 		}
241 
242 		if (off < skb->len)
243 			goto next_ht;
244 	}
245 
246 	/* POP */
247 	if (sdepth--) {
248 		n = stack[sdepth].knode;
249 		ht = rcu_dereference_bh(n->ht_up);
250 		off = stack[sdepth].off;
251 		goto check_terminal;
252 	}
253 out:
254 	return -1;
255 
256 deadloop:
257 	net_warn_ratelimited("cls_u32: dead loop\n");
258 	return -1;
259 }
260 
261 static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
262 {
263 	struct tc_u_hnode *ht;
264 
265 	for (ht = rtnl_dereference(tp_c->hlist);
266 	     ht;
267 	     ht = rtnl_dereference(ht->next))
268 		if (ht->handle == handle)
269 			break;
270 
271 	return ht;
272 }
273 
274 static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
275 {
276 	unsigned int sel;
277 	struct tc_u_knode *n = NULL;
278 
279 	sel = TC_U32_HASH(handle);
280 	if (sel > ht->divisor)
281 		goto out;
282 
283 	for (n = rtnl_dereference(ht->ht[sel]);
284 	     n;
285 	     n = rtnl_dereference(n->next))
286 		if (n->handle == handle)
287 			break;
288 out:
289 	return n;
290 }
291 
292 
293 static void *u32_get(struct tcf_proto *tp, u32 handle)
294 {
295 	struct tc_u_hnode *ht;
296 	struct tc_u_common *tp_c = tp->data;
297 
298 	if (TC_U32_HTID(handle) == TC_U32_ROOT)
299 		ht = rtnl_dereference(tp->root);
300 	else
301 		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
302 
303 	if (!ht)
304 		return NULL;
305 
306 	if (TC_U32_KEY(handle) == 0)
307 		return ht;
308 
309 	return u32_lookup_key(ht, handle);
310 }
311 
312 /* Protected by rtnl lock */
313 static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
314 {
315 	int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL);
316 	if (id < 0)
317 		return 0;
318 	return (id | 0x800U) << 20;
319 }
320 
321 static struct hlist_head *tc_u_common_hash;
322 
323 #define U32_HASH_SHIFT 10
324 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
325 
326 static void *tc_u_common_ptr(const struct tcf_proto *tp)
327 {
328 	struct tcf_block *block = tp->chain->block;
329 
330 	/* The block sharing is currently supported only
331 	 * for classless qdiscs. In that case we use block
332 	 * for tc_u_common identification. In case the
333 	 * block is not shared, block->q is a valid pointer
334 	 * and we can use that. That works for classful qdiscs.
335 	 */
336 	if (tcf_block_shared(block))
337 		return block;
338 	else
339 		return block->q;
340 }
341 
342 static struct hlist_head *tc_u_hash(void *key)
343 {
344 	return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT);
345 }
346 
347 static struct tc_u_common *tc_u_common_find(void *key)
348 {
349 	struct tc_u_common *tc;
350 	hlist_for_each_entry(tc, tc_u_hash(key), hnode) {
351 		if (tc->ptr == key)
352 			return tc;
353 	}
354 	return NULL;
355 }
356 
357 static int u32_init(struct tcf_proto *tp)
358 {
359 	struct tc_u_hnode *root_ht;
360 	void *key = tc_u_common_ptr(tp);
361 	struct tc_u_common *tp_c = tc_u_common_find(key);
362 
363 	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
364 	if (root_ht == NULL)
365 		return -ENOBUFS;
366 
367 	root_ht->refcnt++;
368 	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
369 	root_ht->prio = tp->prio;
370 	root_ht->is_root = true;
371 	idr_init(&root_ht->handle_idr);
372 
373 	if (tp_c == NULL) {
374 		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
375 		if (tp_c == NULL) {
376 			kfree(root_ht);
377 			return -ENOBUFS;
378 		}
379 		tp_c->ptr = key;
380 		INIT_HLIST_NODE(&tp_c->hnode);
381 		idr_init(&tp_c->handle_idr);
382 
383 		hlist_add_head(&tp_c->hnode, tc_u_hash(key));
384 	}
385 
386 	tp_c->refcnt++;
387 	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
388 	rcu_assign_pointer(tp_c->hlist, root_ht);
389 
390 	root_ht->refcnt++;
391 	rcu_assign_pointer(tp->root, root_ht);
392 	tp->data = tp_c;
393 	return 0;
394 }
395 
396 static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
397 {
398 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
399 
400 	tcf_exts_destroy(&n->exts);
401 	tcf_exts_put_net(&n->exts);
402 	if (ht && --ht->refcnt == 0)
403 		kfree(ht);
404 #ifdef CONFIG_CLS_U32_PERF
405 	if (free_pf)
406 		free_percpu(n->pf);
407 #endif
408 #ifdef CONFIG_CLS_U32_MARK
409 	if (free_pf)
410 		free_percpu(n->pcpu_success);
411 #endif
412 	kfree(n);
413 	return 0;
414 }
415 
416 /* u32_delete_key_rcu should be called when free'ing a copied
417  * version of a tc_u_knode obtained from u32_init_knode(). When
418  * copies are obtained from u32_init_knode() the statistics are
419  * shared between the old and new copies to allow readers to
420  * continue to update the statistics during the copy. To support
421  * this the u32_delete_key_rcu variant does not free the percpu
422  * statistics.
423  */
424 static void u32_delete_key_work(struct work_struct *work)
425 {
426 	struct tc_u_knode *key = container_of(to_rcu_work(work),
427 					      struct tc_u_knode,
428 					      rwork);
429 	rtnl_lock();
430 	u32_destroy_key(key, false);
431 	rtnl_unlock();
432 }
433 
434 /* u32_delete_key_freepf_rcu is the rcu callback variant
435  * that free's the entire structure including the statistics
436  * percpu variables. Only use this if the key is not a copy
437  * returned by u32_init_knode(). See u32_delete_key_rcu()
438  * for the variant that should be used with keys return from
439  * u32_init_knode()
440  */
441 static void u32_delete_key_freepf_work(struct work_struct *work)
442 {
443 	struct tc_u_knode *key = container_of(to_rcu_work(work),
444 					      struct tc_u_knode,
445 					      rwork);
446 	rtnl_lock();
447 	u32_destroy_key(key, true);
448 	rtnl_unlock();
449 }
450 
451 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
452 {
453 	struct tc_u_common *tp_c = tp->data;
454 	struct tc_u_knode __rcu **kp;
455 	struct tc_u_knode *pkp;
456 	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
457 
458 	if (ht) {
459 		kp = &ht->ht[TC_U32_HASH(key->handle)];
460 		for (pkp = rtnl_dereference(*kp); pkp;
461 		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
462 			if (pkp == key) {
463 				RCU_INIT_POINTER(*kp, key->next);
464 				tp_c->knodes--;
465 
466 				tcf_unbind_filter(tp, &key->res);
467 				idr_remove(&ht->handle_idr, key->handle);
468 				tcf_exts_get_net(&key->exts);
469 				tcf_queue_work(&key->rwork, u32_delete_key_freepf_work);
470 				return 0;
471 			}
472 		}
473 	}
474 	WARN_ON(1);
475 	return 0;
476 }
477 
478 static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
479 			       struct netlink_ext_ack *extack)
480 {
481 	struct tcf_block *block = tp->chain->block;
482 	struct tc_cls_u32_offload cls_u32 = {};
483 
484 	tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
485 	cls_u32.command = TC_CLSU32_DELETE_HNODE;
486 	cls_u32.hnode.divisor = h->divisor;
487 	cls_u32.hnode.handle = h->handle;
488 	cls_u32.hnode.prio = h->prio;
489 
490 	tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
491 }
492 
493 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
494 				u32 flags, struct netlink_ext_ack *extack)
495 {
496 	struct tcf_block *block = tp->chain->block;
497 	struct tc_cls_u32_offload cls_u32 = {};
498 	bool skip_sw = tc_skip_sw(flags);
499 	bool offloaded = false;
500 	int err;
501 
502 	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
503 	cls_u32.command = TC_CLSU32_NEW_HNODE;
504 	cls_u32.hnode.divisor = h->divisor;
505 	cls_u32.hnode.handle = h->handle;
506 	cls_u32.hnode.prio = h->prio;
507 
508 	err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
509 	if (err < 0) {
510 		u32_clear_hw_hnode(tp, h, NULL);
511 		return err;
512 	} else if (err > 0) {
513 		offloaded = true;
514 	}
515 
516 	if (skip_sw && !offloaded)
517 		return -EINVAL;
518 
519 	return 0;
520 }
521 
522 static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
523 				struct netlink_ext_ack *extack)
524 {
525 	struct tcf_block *block = tp->chain->block;
526 	struct tc_cls_u32_offload cls_u32 = {};
527 
528 	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
529 	cls_u32.command = TC_CLSU32_DELETE_KNODE;
530 	cls_u32.knode.handle = n->handle;
531 
532 	tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
533 	tcf_block_offload_dec(block, &n->flags);
534 }
535 
536 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
537 				u32 flags, struct netlink_ext_ack *extack)
538 {
539 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
540 	struct tcf_block *block = tp->chain->block;
541 	struct tc_cls_u32_offload cls_u32 = {};
542 	bool skip_sw = tc_skip_sw(flags);
543 	int err;
544 
545 	tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
546 	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
547 	cls_u32.knode.handle = n->handle;
548 	cls_u32.knode.fshift = n->fshift;
549 #ifdef CONFIG_CLS_U32_MARK
550 	cls_u32.knode.val = n->val;
551 	cls_u32.knode.mask = n->mask;
552 #else
553 	cls_u32.knode.val = 0;
554 	cls_u32.knode.mask = 0;
555 #endif
556 	cls_u32.knode.sel = &n->sel;
557 	cls_u32.knode.res = &n->res;
558 	cls_u32.knode.exts = &n->exts;
559 	if (n->ht_down)
560 		cls_u32.knode.link_handle = ht->handle;
561 
562 	err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
563 	if (err < 0) {
564 		u32_remove_hw_knode(tp, n, NULL);
565 		return err;
566 	} else if (err > 0) {
567 		n->in_hw_count = err;
568 		tcf_block_offload_inc(block, &n->flags);
569 	}
570 
571 	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
572 		return -EINVAL;
573 
574 	return 0;
575 }
576 
577 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
578 			    struct netlink_ext_ack *extack)
579 {
580 	struct tc_u_common *tp_c = tp->data;
581 	struct tc_u_knode *n;
582 	unsigned int h;
583 
584 	for (h = 0; h <= ht->divisor; h++) {
585 		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
586 			RCU_INIT_POINTER(ht->ht[h],
587 					 rtnl_dereference(n->next));
588 			tp_c->knodes--;
589 			tcf_unbind_filter(tp, &n->res);
590 			u32_remove_hw_knode(tp, n, extack);
591 			idr_remove(&ht->handle_idr, n->handle);
592 			if (tcf_exts_get_net(&n->exts))
593 				tcf_queue_work(&n->rwork, u32_delete_key_freepf_work);
594 			else
595 				u32_destroy_key(n, true);
596 		}
597 	}
598 }
599 
600 static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
601 			     struct netlink_ext_ack *extack)
602 {
603 	struct tc_u_common *tp_c = tp->data;
604 	struct tc_u_hnode __rcu **hn;
605 	struct tc_u_hnode *phn;
606 
607 	WARN_ON(--ht->refcnt);
608 
609 	u32_clear_hnode(tp, ht, extack);
610 
611 	hn = &tp_c->hlist;
612 	for (phn = rtnl_dereference(*hn);
613 	     phn;
614 	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
615 		if (phn == ht) {
616 			u32_clear_hw_hnode(tp, ht, extack);
617 			idr_destroy(&ht->handle_idr);
618 			idr_remove(&tp_c->handle_idr, ht->handle);
619 			RCU_INIT_POINTER(*hn, ht->next);
620 			kfree_rcu(ht, rcu);
621 			return 0;
622 		}
623 	}
624 
625 	return -ENOENT;
626 }
627 
628 static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
629 			struct netlink_ext_ack *extack)
630 {
631 	struct tc_u_common *tp_c = tp->data;
632 	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
633 
634 	WARN_ON(root_ht == NULL);
635 
636 	if (root_ht && --root_ht->refcnt == 1)
637 		u32_destroy_hnode(tp, root_ht, extack);
638 
639 	if (--tp_c->refcnt == 0) {
640 		struct tc_u_hnode *ht;
641 
642 		hlist_del(&tp_c->hnode);
643 
644 		while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
645 			u32_clear_hnode(tp, ht, extack);
646 			RCU_INIT_POINTER(tp_c->hlist, ht->next);
647 
648 			/* u32_destroy_key() will later free ht for us, if it's
649 			 * still referenced by some knode
650 			 */
651 			if (--ht->refcnt == 0)
652 				kfree_rcu(ht, rcu);
653 		}
654 
655 		idr_destroy(&tp_c->handle_idr);
656 		kfree(tp_c);
657 	}
658 
659 	tp->data = NULL;
660 }
661 
662 static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
663 		      bool rtnl_held, struct netlink_ext_ack *extack)
664 {
665 	struct tc_u_hnode *ht = arg;
666 	struct tc_u_common *tp_c = tp->data;
667 	int ret = 0;
668 
669 	if (TC_U32_KEY(ht->handle)) {
670 		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
671 		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
672 		goto out;
673 	}
674 
675 	if (ht->is_root) {
676 		NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
677 		return -EINVAL;
678 	}
679 
680 	if (ht->refcnt == 1) {
681 		u32_destroy_hnode(tp, ht, extack);
682 	} else {
683 		NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
684 		return -EBUSY;
685 	}
686 
687 out:
688 	*last = tp_c->refcnt == 1 && tp_c->knodes == 0;
689 	return ret;
690 }
691 
692 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
693 {
694 	u32 index = htid | 0x800;
695 	u32 max = htid | 0xFFF;
696 
697 	if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max, GFP_KERNEL)) {
698 		index = htid + 1;
699 		if (idr_alloc_u32(&ht->handle_idr, NULL, &index, max,
700 				 GFP_KERNEL))
701 			index = max;
702 	}
703 
704 	return index;
705 }
706 
707 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
708 	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
709 	[TCA_U32_HASH]		= { .type = NLA_U32 },
710 	[TCA_U32_LINK]		= { .type = NLA_U32 },
711 	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
712 	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
713 	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
714 	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
715 	[TCA_U32_FLAGS]		= { .type = NLA_U32 },
716 };
717 
718 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
719 			 unsigned long base,
720 			 struct tc_u_knode *n, struct nlattr **tb,
721 			 struct nlattr *est, bool ovr,
722 			 struct netlink_ext_ack *extack)
723 {
724 	int err;
725 
726 	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
727 	if (err < 0)
728 		return err;
729 
730 	if (tb[TCA_U32_LINK]) {
731 		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
732 		struct tc_u_hnode *ht_down = NULL, *ht_old;
733 
734 		if (TC_U32_KEY(handle)) {
735 			NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table");
736 			return -EINVAL;
737 		}
738 
739 		if (handle) {
740 			ht_down = u32_lookup_ht(tp->data, handle);
741 
742 			if (!ht_down) {
743 				NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
744 				return -EINVAL;
745 			}
746 			if (ht_down->is_root) {
747 				NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
748 				return -EINVAL;
749 			}
750 			ht_down->refcnt++;
751 		}
752 
753 		ht_old = rtnl_dereference(n->ht_down);
754 		rcu_assign_pointer(n->ht_down, ht_down);
755 
756 		if (ht_old)
757 			ht_old->refcnt--;
758 	}
759 	if (tb[TCA_U32_CLASSID]) {
760 		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
761 		tcf_bind_filter(tp, &n->res, base);
762 	}
763 
764 #ifdef CONFIG_NET_CLS_IND
765 	if (tb[TCA_U32_INDEV]) {
766 		int ret;
767 		ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
768 		if (ret < 0)
769 			return -EINVAL;
770 		n->ifindex = ret;
771 	}
772 #endif
773 	return 0;
774 }
775 
776 static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
777 			      struct tc_u_knode *n)
778 {
779 	struct tc_u_knode __rcu **ins;
780 	struct tc_u_knode *pins;
781 	struct tc_u_hnode *ht;
782 
783 	if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
784 		ht = rtnl_dereference(tp->root);
785 	else
786 		ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
787 
788 	ins = &ht->ht[TC_U32_HASH(n->handle)];
789 
790 	/* The node must always exist for it to be replaced if this is not the
791 	 * case then something went very wrong elsewhere.
792 	 */
793 	for (pins = rtnl_dereference(*ins); ;
794 	     ins = &pins->next, pins = rtnl_dereference(*ins))
795 		if (pins->handle == n->handle)
796 			break;
797 
798 	idr_replace(&ht->handle_idr, n, n->handle);
799 	RCU_INIT_POINTER(n->next, pins->next);
800 	rcu_assign_pointer(*ins, n);
801 }
802 
803 static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
804 					 struct tc_u_knode *n)
805 {
806 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
807 	struct tc_u32_sel *s = &n->sel;
808 	struct tc_u_knode *new;
809 
810 	new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
811 		      GFP_KERNEL);
812 
813 	if (!new)
814 		return NULL;
815 
816 	RCU_INIT_POINTER(new->next, n->next);
817 	new->handle = n->handle;
818 	RCU_INIT_POINTER(new->ht_up, n->ht_up);
819 
820 #ifdef CONFIG_NET_CLS_IND
821 	new->ifindex = n->ifindex;
822 #endif
823 	new->fshift = n->fshift;
824 	new->res = n->res;
825 	new->flags = n->flags;
826 	RCU_INIT_POINTER(new->ht_down, ht);
827 
828 	/* bump reference count as long as we hold pointer to structure */
829 	if (ht)
830 		ht->refcnt++;
831 
832 #ifdef CONFIG_CLS_U32_PERF
833 	/* Statistics may be incremented by readers during update
834 	 * so we must keep them in tact. When the node is later destroyed
835 	 * a special destroy call must be made to not free the pf memory.
836 	 */
837 	new->pf = n->pf;
838 #endif
839 
840 #ifdef CONFIG_CLS_U32_MARK
841 	new->val = n->val;
842 	new->mask = n->mask;
843 	/* Similarly success statistics must be moved as pointers */
844 	new->pcpu_success = n->pcpu_success;
845 #endif
846 	memcpy(&new->sel, s, struct_size(s, keys, s->nkeys));
847 
848 	if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
849 		kfree(new);
850 		return NULL;
851 	}
852 
853 	return new;
854 }
855 
856 static int u32_change(struct net *net, struct sk_buff *in_skb,
857 		      struct tcf_proto *tp, unsigned long base, u32 handle,
858 		      struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
859 		      struct netlink_ext_ack *extack)
860 {
861 	struct tc_u_common *tp_c = tp->data;
862 	struct tc_u_hnode *ht;
863 	struct tc_u_knode *n;
864 	struct tc_u32_sel *s;
865 	struct nlattr *opt = tca[TCA_OPTIONS];
866 	struct nlattr *tb[TCA_U32_MAX + 1];
867 	u32 htid, flags = 0;
868 	size_t sel_size;
869 	int err;
870 #ifdef CONFIG_CLS_U32_PERF
871 	size_t size;
872 #endif
873 
874 	if (!opt) {
875 		if (handle) {
876 			NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options");
877 			return -EINVAL;
878 		} else {
879 			return 0;
880 		}
881 	}
882 
883 	err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, opt, u32_policy,
884 					  extack);
885 	if (err < 0)
886 		return err;
887 
888 	if (tb[TCA_U32_FLAGS]) {
889 		flags = nla_get_u32(tb[TCA_U32_FLAGS]);
890 		if (!tc_flags_valid(flags)) {
891 			NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
892 			return -EINVAL;
893 		}
894 	}
895 
896 	n = *arg;
897 	if (n) {
898 		struct tc_u_knode *new;
899 
900 		if (TC_U32_KEY(n->handle) == 0) {
901 			NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero");
902 			return -EINVAL;
903 		}
904 
905 		if ((n->flags ^ flags) &
906 		    ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
907 			NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
908 			return -EINVAL;
909 		}
910 
911 		new = u32_init_knode(net, tp, n);
912 		if (!new)
913 			return -ENOMEM;
914 
915 		err = u32_set_parms(net, tp, base, new, tb,
916 				    tca[TCA_RATE], ovr, extack);
917 
918 		if (err) {
919 			u32_destroy_key(new, false);
920 			return err;
921 		}
922 
923 		err = u32_replace_hw_knode(tp, new, flags, extack);
924 		if (err) {
925 			u32_destroy_key(new, false);
926 			return err;
927 		}
928 
929 		if (!tc_in_hw(new->flags))
930 			new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
931 
932 		u32_replace_knode(tp, tp_c, new);
933 		tcf_unbind_filter(tp, &n->res);
934 		tcf_exts_get_net(&n->exts);
935 		tcf_queue_work(&n->rwork, u32_delete_key_work);
936 		return 0;
937 	}
938 
939 	if (tb[TCA_U32_DIVISOR]) {
940 		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
941 
942 		if (!is_power_of_2(divisor)) {
943 			NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2");
944 			return -EINVAL;
945 		}
946 		if (divisor-- > 0x100) {
947 			NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
948 			return -EINVAL;
949 		}
950 		if (TC_U32_KEY(handle)) {
951 			NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table");
952 			return -EINVAL;
953 		}
954 		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
955 		if (ht == NULL)
956 			return -ENOBUFS;
957 		if (handle == 0) {
958 			handle = gen_new_htid(tp->data, ht);
959 			if (handle == 0) {
960 				kfree(ht);
961 				return -ENOMEM;
962 			}
963 		} else {
964 			err = idr_alloc_u32(&tp_c->handle_idr, ht, &handle,
965 					    handle, GFP_KERNEL);
966 			if (err) {
967 				kfree(ht);
968 				return err;
969 			}
970 		}
971 		ht->refcnt = 1;
972 		ht->divisor = divisor;
973 		ht->handle = handle;
974 		ht->prio = tp->prio;
975 		idr_init(&ht->handle_idr);
976 		ht->flags = flags;
977 
978 		err = u32_replace_hw_hnode(tp, ht, flags, extack);
979 		if (err) {
980 			idr_remove(&tp_c->handle_idr, handle);
981 			kfree(ht);
982 			return err;
983 		}
984 
985 		RCU_INIT_POINTER(ht->next, tp_c->hlist);
986 		rcu_assign_pointer(tp_c->hlist, ht);
987 		*arg = ht;
988 
989 		return 0;
990 	}
991 
992 	if (tb[TCA_U32_HASH]) {
993 		htid = nla_get_u32(tb[TCA_U32_HASH]);
994 		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
995 			ht = rtnl_dereference(tp->root);
996 			htid = ht->handle;
997 		} else {
998 			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
999 			if (!ht) {
1000 				NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found");
1001 				return -EINVAL;
1002 			}
1003 		}
1004 	} else {
1005 		ht = rtnl_dereference(tp->root);
1006 		htid = ht->handle;
1007 	}
1008 
1009 	if (ht->divisor < TC_U32_HASH(htid)) {
1010 		NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value");
1011 		return -EINVAL;
1012 	}
1013 
1014 	if (handle) {
1015 		if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
1016 			NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
1017 			return -EINVAL;
1018 		}
1019 		handle = htid | TC_U32_NODE(handle);
1020 		err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
1021 				    GFP_KERNEL);
1022 		if (err)
1023 			return err;
1024 	} else
1025 		handle = gen_new_kid(ht, htid);
1026 
1027 	if (tb[TCA_U32_SEL] == NULL) {
1028 		NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
1029 		err = -EINVAL;
1030 		goto erridr;
1031 	}
1032 
1033 	s = nla_data(tb[TCA_U32_SEL]);
1034 	sel_size = struct_size(s, keys, s->nkeys);
1035 	if (nla_len(tb[TCA_U32_SEL]) < sel_size) {
1036 		err = -EINVAL;
1037 		goto erridr;
1038 	}
1039 
1040 	n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL);
1041 	if (n == NULL) {
1042 		err = -ENOBUFS;
1043 		goto erridr;
1044 	}
1045 
1046 #ifdef CONFIG_CLS_U32_PERF
1047 	size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
1048 	n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
1049 	if (!n->pf) {
1050 		err = -ENOBUFS;
1051 		goto errfree;
1052 	}
1053 #endif
1054 
1055 	memcpy(&n->sel, s, sel_size);
1056 	RCU_INIT_POINTER(n->ht_up, ht);
1057 	n->handle = handle;
1058 	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1059 	n->flags = flags;
1060 
1061 	err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
1062 	if (err < 0)
1063 		goto errout;
1064 
1065 #ifdef CONFIG_CLS_U32_MARK
1066 	n->pcpu_success = alloc_percpu(u32);
1067 	if (!n->pcpu_success) {
1068 		err = -ENOMEM;
1069 		goto errout;
1070 	}
1071 
1072 	if (tb[TCA_U32_MARK]) {
1073 		struct tc_u32_mark *mark;
1074 
1075 		mark = nla_data(tb[TCA_U32_MARK]);
1076 		n->val = mark->val;
1077 		n->mask = mark->mask;
1078 	}
1079 #endif
1080 
1081 	err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
1082 			    extack);
1083 	if (err == 0) {
1084 		struct tc_u_knode __rcu **ins;
1085 		struct tc_u_knode *pins;
1086 
1087 		err = u32_replace_hw_knode(tp, n, flags, extack);
1088 		if (err)
1089 			goto errhw;
1090 
1091 		if (!tc_in_hw(n->flags))
1092 			n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
1093 
1094 		ins = &ht->ht[TC_U32_HASH(handle)];
1095 		for (pins = rtnl_dereference(*ins); pins;
1096 		     ins = &pins->next, pins = rtnl_dereference(*ins))
1097 			if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
1098 				break;
1099 
1100 		RCU_INIT_POINTER(n->next, pins);
1101 		rcu_assign_pointer(*ins, n);
1102 		tp_c->knodes++;
1103 		*arg = n;
1104 		return 0;
1105 	}
1106 
1107 errhw:
1108 #ifdef CONFIG_CLS_U32_MARK
1109 	free_percpu(n->pcpu_success);
1110 #endif
1111 
1112 errout:
1113 	tcf_exts_destroy(&n->exts);
1114 #ifdef CONFIG_CLS_U32_PERF
1115 errfree:
1116 	free_percpu(n->pf);
1117 #endif
1118 	kfree(n);
1119 erridr:
1120 	idr_remove(&ht->handle_idr, handle);
1121 	return err;
1122 }
1123 
1124 static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
1125 		     bool rtnl_held)
1126 {
1127 	struct tc_u_common *tp_c = tp->data;
1128 	struct tc_u_hnode *ht;
1129 	struct tc_u_knode *n;
1130 	unsigned int h;
1131 
1132 	if (arg->stop)
1133 		return;
1134 
1135 	for (ht = rtnl_dereference(tp_c->hlist);
1136 	     ht;
1137 	     ht = rtnl_dereference(ht->next)) {
1138 		if (ht->prio != tp->prio)
1139 			continue;
1140 		if (arg->count >= arg->skip) {
1141 			if (arg->fn(tp, ht, arg) < 0) {
1142 				arg->stop = 1;
1143 				return;
1144 			}
1145 		}
1146 		arg->count++;
1147 		for (h = 0; h <= ht->divisor; h++) {
1148 			for (n = rtnl_dereference(ht->ht[h]);
1149 			     n;
1150 			     n = rtnl_dereference(n->next)) {
1151 				if (arg->count < arg->skip) {
1152 					arg->count++;
1153 					continue;
1154 				}
1155 				if (arg->fn(tp, n, arg) < 0) {
1156 					arg->stop = 1;
1157 					return;
1158 				}
1159 				arg->count++;
1160 			}
1161 		}
1162 	}
1163 }
1164 
1165 static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
1166 			       bool add, tc_setup_cb_t *cb, void *cb_priv,
1167 			       struct netlink_ext_ack *extack)
1168 {
1169 	struct tc_cls_u32_offload cls_u32 = {};
1170 	int err;
1171 
1172 	tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, extack);
1173 	cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE;
1174 	cls_u32.hnode.divisor = ht->divisor;
1175 	cls_u32.hnode.handle = ht->handle;
1176 	cls_u32.hnode.prio = ht->prio;
1177 
1178 	err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
1179 	if (err && add && tc_skip_sw(ht->flags))
1180 		return err;
1181 
1182 	return 0;
1183 }
1184 
1185 static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
1186 			       bool add, tc_setup_cb_t *cb, void *cb_priv,
1187 			       struct netlink_ext_ack *extack)
1188 {
1189 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
1190 	struct tcf_block *block = tp->chain->block;
1191 	struct tc_cls_u32_offload cls_u32 = {};
1192 	int err;
1193 
1194 	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
1195 	cls_u32.command = add ?
1196 		TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE;
1197 	cls_u32.knode.handle = n->handle;
1198 
1199 	if (add) {
1200 		cls_u32.knode.fshift = n->fshift;
1201 #ifdef CONFIG_CLS_U32_MARK
1202 		cls_u32.knode.val = n->val;
1203 		cls_u32.knode.mask = n->mask;
1204 #else
1205 		cls_u32.knode.val = 0;
1206 		cls_u32.knode.mask = 0;
1207 #endif
1208 		cls_u32.knode.sel = &n->sel;
1209 		cls_u32.knode.res = &n->res;
1210 		cls_u32.knode.exts = &n->exts;
1211 		if (n->ht_down)
1212 			cls_u32.knode.link_handle = ht->handle;
1213 	}
1214 
1215 	err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
1216 	if (err) {
1217 		if (add && tc_skip_sw(n->flags))
1218 			return err;
1219 		return 0;
1220 	}
1221 
1222 	tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add);
1223 
1224 	return 0;
1225 }
1226 
1227 static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
1228 			 void *cb_priv, struct netlink_ext_ack *extack)
1229 {
1230 	struct tc_u_common *tp_c = tp->data;
1231 	struct tc_u_hnode *ht;
1232 	struct tc_u_knode *n;
1233 	unsigned int h;
1234 	int err;
1235 
1236 	for (ht = rtnl_dereference(tp_c->hlist);
1237 	     ht;
1238 	     ht = rtnl_dereference(ht->next)) {
1239 		if (ht->prio != tp->prio)
1240 			continue;
1241 
1242 		/* When adding filters to a new dev, try to offload the
1243 		 * hashtable first. When removing, do the filters before the
1244 		 * hashtable.
1245 		 */
1246 		if (add && !tc_skip_hw(ht->flags)) {
1247 			err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv,
1248 						  extack);
1249 			if (err)
1250 				return err;
1251 		}
1252 
1253 		for (h = 0; h <= ht->divisor; h++) {
1254 			for (n = rtnl_dereference(ht->ht[h]);
1255 			     n;
1256 			     n = rtnl_dereference(n->next)) {
1257 				if (tc_skip_hw(n->flags))
1258 					continue;
1259 
1260 				err = u32_reoffload_knode(tp, n, add, cb,
1261 							  cb_priv, extack);
1262 				if (err)
1263 					return err;
1264 			}
1265 		}
1266 
1267 		if (!add && !tc_skip_hw(ht->flags))
1268 			u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack);
1269 	}
1270 
1271 	return 0;
1272 }
1273 
1274 static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
1275 {
1276 	struct tc_u_knode *n = fh;
1277 
1278 	if (n && n->res.classid == classid)
1279 		n->res.class = cl;
1280 }
1281 
1282 static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
1283 		    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
1284 {
1285 	struct tc_u_knode *n = fh;
1286 	struct tc_u_hnode *ht_up, *ht_down;
1287 	struct nlattr *nest;
1288 
1289 	if (n == NULL)
1290 		return skb->len;
1291 
1292 	t->tcm_handle = n->handle;
1293 
1294 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1295 	if (nest == NULL)
1296 		goto nla_put_failure;
1297 
1298 	if (TC_U32_KEY(n->handle) == 0) {
1299 		struct tc_u_hnode *ht = fh;
1300 		u32 divisor = ht->divisor + 1;
1301 
1302 		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
1303 			goto nla_put_failure;
1304 	} else {
1305 #ifdef CONFIG_CLS_U32_PERF
1306 		struct tc_u32_pcnt *gpf;
1307 		int cpu;
1308 #endif
1309 
1310 		if (nla_put(skb, TCA_U32_SEL,
1311 			    sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
1312 			    &n->sel))
1313 			goto nla_put_failure;
1314 
1315 		ht_up = rtnl_dereference(n->ht_up);
1316 		if (ht_up) {
1317 			u32 htid = n->handle & 0xFFFFF000;
1318 			if (nla_put_u32(skb, TCA_U32_HASH, htid))
1319 				goto nla_put_failure;
1320 		}
1321 		if (n->res.classid &&
1322 		    nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
1323 			goto nla_put_failure;
1324 
1325 		ht_down = rtnl_dereference(n->ht_down);
1326 		if (ht_down &&
1327 		    nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1328 			goto nla_put_failure;
1329 
1330 		if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
1331 			goto nla_put_failure;
1332 
1333 #ifdef CONFIG_CLS_U32_MARK
1334 		if ((n->val || n->mask)) {
1335 			struct tc_u32_mark mark = {.val = n->val,
1336 						   .mask = n->mask,
1337 						   .success = 0};
1338 			int cpum;
1339 
1340 			for_each_possible_cpu(cpum) {
1341 				__u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1342 
1343 				mark.success += cnt;
1344 			}
1345 
1346 			if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
1347 				goto nla_put_failure;
1348 		}
1349 #endif
1350 
1351 		if (tcf_exts_dump(skb, &n->exts) < 0)
1352 			goto nla_put_failure;
1353 
1354 #ifdef CONFIG_NET_CLS_IND
1355 		if (n->ifindex) {
1356 			struct net_device *dev;
1357 			dev = __dev_get_by_index(net, n->ifindex);
1358 			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
1359 				goto nla_put_failure;
1360 		}
1361 #endif
1362 #ifdef CONFIG_CLS_U32_PERF
1363 		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
1364 			      n->sel.nkeys * sizeof(u64),
1365 			      GFP_KERNEL);
1366 		if (!gpf)
1367 			goto nla_put_failure;
1368 
1369 		for_each_possible_cpu(cpu) {
1370 			int i;
1371 			struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
1372 
1373 			gpf->rcnt += pf->rcnt;
1374 			gpf->rhit += pf->rhit;
1375 			for (i = 0; i < n->sel.nkeys; i++)
1376 				gpf->kcnts[i] += pf->kcnts[i];
1377 		}
1378 
1379 		if (nla_put_64bit(skb, TCA_U32_PCNT,
1380 				  sizeof(struct tc_u32_pcnt) +
1381 				  n->sel.nkeys * sizeof(u64),
1382 				  gpf, TCA_U32_PAD)) {
1383 			kfree(gpf);
1384 			goto nla_put_failure;
1385 		}
1386 		kfree(gpf);
1387 #endif
1388 	}
1389 
1390 	nla_nest_end(skb, nest);
1391 
1392 	if (TC_U32_KEY(n->handle))
1393 		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1394 			goto nla_put_failure;
1395 	return skb->len;
1396 
1397 nla_put_failure:
1398 	nla_nest_cancel(skb, nest);
1399 	return -1;
1400 }
1401 
1402 static struct tcf_proto_ops cls_u32_ops __read_mostly = {
1403 	.kind		=	"u32",
1404 	.classify	=	u32_classify,
1405 	.init		=	u32_init,
1406 	.destroy	=	u32_destroy,
1407 	.get		=	u32_get,
1408 	.change		=	u32_change,
1409 	.delete		=	u32_delete,
1410 	.walk		=	u32_walk,
1411 	.reoffload	=	u32_reoffload,
1412 	.dump		=	u32_dump,
1413 	.bind_class	=	u32_bind_class,
1414 	.owner		=	THIS_MODULE,
1415 };
1416 
1417 static int __init init_u32(void)
1418 {
1419 	int i, ret;
1420 
1421 	pr_info("u32 classifier\n");
1422 #ifdef CONFIG_CLS_U32_PERF
1423 	pr_info("    Performance counters on\n");
1424 #endif
1425 #ifdef CONFIG_NET_CLS_IND
1426 	pr_info("    input device check on\n");
1427 #endif
1428 #ifdef CONFIG_NET_CLS_ACT
1429 	pr_info("    Actions configured\n");
1430 #endif
1431 	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
1432 					  sizeof(struct hlist_head),
1433 					  GFP_KERNEL);
1434 	if (!tc_u_common_hash)
1435 		return -ENOMEM;
1436 
1437 	for (i = 0; i < U32_HASH_SIZE; i++)
1438 		INIT_HLIST_HEAD(&tc_u_common_hash[i]);
1439 
1440 	ret = register_tcf_proto_ops(&cls_u32_ops);
1441 	if (ret)
1442 		kvfree(tc_u_common_hash);
1443 	return ret;
1444 }
1445 
1446 static void __exit exit_u32(void)
1447 {
1448 	unregister_tcf_proto_ops(&cls_u32_ops);
1449 	kvfree(tc_u_common_hash);
1450 }
1451 
1452 module_init(init_u32)
1453 module_exit(exit_u32)
1454 MODULE_LICENSE("GPL");
1455