xref: /openbmc/linux/net/sched/cls_flower.c (revision f0702555)
1 /*
2  * net/sched/cls_flower.c		Flower classifier
3  *
4  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/rhashtable.h>
16 
17 #include <linux/if_ether.h>
18 #include <linux/in6.h>
19 #include <linux/ip.h>
20 
21 #include <net/sch_generic.h>
22 #include <net/pkt_cls.h>
23 #include <net/ip.h>
24 #include <net/flow_dissector.h>
25 
26 struct fl_flow_key {
27 	int	indev_ifindex;
28 	struct flow_dissector_key_control control;
29 	struct flow_dissector_key_basic basic;
30 	struct flow_dissector_key_eth_addrs eth;
31 	struct flow_dissector_key_addrs ipaddrs;
32 	union {
33 		struct flow_dissector_key_ipv4_addrs ipv4;
34 		struct flow_dissector_key_ipv6_addrs ipv6;
35 	};
36 	struct flow_dissector_key_ports tp;
37 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
38 
39 struct fl_flow_mask_range {
40 	unsigned short int start;
41 	unsigned short int end;
42 };
43 
44 struct fl_flow_mask {
45 	struct fl_flow_key key;
46 	struct fl_flow_mask_range range;
47 	struct rcu_head	rcu;
48 };
49 
50 struct cls_fl_head {
51 	struct rhashtable ht;
52 	struct fl_flow_mask mask;
53 	struct flow_dissector dissector;
54 	u32 hgen;
55 	bool mask_assigned;
56 	struct list_head filters;
57 	struct rhashtable_params ht_params;
58 	struct rcu_head rcu;
59 };
60 
61 struct cls_fl_filter {
62 	struct rhash_head ht_node;
63 	struct fl_flow_key mkey;
64 	struct tcf_exts exts;
65 	struct tcf_result res;
66 	struct fl_flow_key key;
67 	struct list_head list;
68 	u32 handle;
69 	struct rcu_head	rcu;
70 };
71 
72 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
73 {
74 	return mask->range.end - mask->range.start;
75 }
76 
77 static void fl_mask_update_range(struct fl_flow_mask *mask)
78 {
79 	const u8 *bytes = (const u8 *) &mask->key;
80 	size_t size = sizeof(mask->key);
81 	size_t i, first = 0, last = size - 1;
82 
83 	for (i = 0; i < sizeof(mask->key); i++) {
84 		if (bytes[i]) {
85 			if (!first && i)
86 				first = i;
87 			last = i;
88 		}
89 	}
90 	mask->range.start = rounddown(first, sizeof(long));
91 	mask->range.end = roundup(last + 1, sizeof(long));
92 }
93 
94 static void *fl_key_get_start(struct fl_flow_key *key,
95 			      const struct fl_flow_mask *mask)
96 {
97 	return (u8 *) key + mask->range.start;
98 }
99 
100 static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
101 			      struct fl_flow_mask *mask)
102 {
103 	const long *lkey = fl_key_get_start(key, mask);
104 	const long *lmask = fl_key_get_start(&mask->key, mask);
105 	long *lmkey = fl_key_get_start(mkey, mask);
106 	int i;
107 
108 	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
109 		*lmkey++ = *lkey++ & *lmask++;
110 }
111 
112 static void fl_clear_masked_range(struct fl_flow_key *key,
113 				  struct fl_flow_mask *mask)
114 {
115 	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
116 }
117 
118 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
119 		       struct tcf_result *res)
120 {
121 	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
122 	struct cls_fl_filter *f;
123 	struct fl_flow_key skb_key;
124 	struct fl_flow_key skb_mkey;
125 
126 	fl_clear_masked_range(&skb_key, &head->mask);
127 	skb_key.indev_ifindex = skb->skb_iif;
128 	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
129 	 * so do it rather here.
130 	 */
131 	skb_key.basic.n_proto = skb->protocol;
132 	skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
133 
134 	fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
135 
136 	f = rhashtable_lookup_fast(&head->ht,
137 				   fl_key_get_start(&skb_mkey, &head->mask),
138 				   head->ht_params);
139 	if (f) {
140 		*res = f->res;
141 		return tcf_exts_exec(skb, &f->exts, res);
142 	}
143 	return -1;
144 }
145 
146 static int fl_init(struct tcf_proto *tp)
147 {
148 	struct cls_fl_head *head;
149 
150 	head = kzalloc(sizeof(*head), GFP_KERNEL);
151 	if (!head)
152 		return -ENOBUFS;
153 
154 	INIT_LIST_HEAD_RCU(&head->filters);
155 	rcu_assign_pointer(tp->root, head);
156 
157 	return 0;
158 }
159 
160 static void fl_destroy_filter(struct rcu_head *head)
161 {
162 	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
163 
164 	tcf_exts_destroy(&f->exts);
165 	kfree(f);
166 }
167 
168 static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
169 {
170 	struct net_device *dev = tp->q->dev_queue->dev;
171 	struct tc_cls_flower_offload offload = {0};
172 	struct tc_to_netdev tc;
173 
174 	if (!tc_should_offload(dev, tp, 0))
175 		return;
176 
177 	offload.command = TC_CLSFLOWER_DESTROY;
178 	offload.cookie = cookie;
179 
180 	tc.type = TC_SETUP_CLSFLOWER;
181 	tc.cls_flower = &offload;
182 
183 	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
184 }
185 
186 static void fl_hw_replace_filter(struct tcf_proto *tp,
187 				 struct flow_dissector *dissector,
188 				 struct fl_flow_key *mask,
189 				 struct fl_flow_key *key,
190 				 struct tcf_exts *actions,
191 				 unsigned long cookie, u32 flags)
192 {
193 	struct net_device *dev = tp->q->dev_queue->dev;
194 	struct tc_cls_flower_offload offload = {0};
195 	struct tc_to_netdev tc;
196 
197 	if (!tc_should_offload(dev, tp, flags))
198 		return;
199 
200 	offload.command = TC_CLSFLOWER_REPLACE;
201 	offload.cookie = cookie;
202 	offload.dissector = dissector;
203 	offload.mask = mask;
204 	offload.key = key;
205 	offload.exts = actions;
206 
207 	tc.type = TC_SETUP_CLSFLOWER;
208 	tc.cls_flower = &offload;
209 
210 	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
211 }
212 
213 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
214 {
215 	struct net_device *dev = tp->q->dev_queue->dev;
216 	struct tc_cls_flower_offload offload = {0};
217 	struct tc_to_netdev tc;
218 
219 	if (!tc_should_offload(dev, tp, 0))
220 		return;
221 
222 	offload.command = TC_CLSFLOWER_STATS;
223 	offload.cookie = (unsigned long)f;
224 	offload.exts = &f->exts;
225 
226 	tc.type = TC_SETUP_CLSFLOWER;
227 	tc.cls_flower = &offload;
228 
229 	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
230 }
231 
232 static bool fl_destroy(struct tcf_proto *tp, bool force)
233 {
234 	struct cls_fl_head *head = rtnl_dereference(tp->root);
235 	struct cls_fl_filter *f, *next;
236 
237 	if (!force && !list_empty(&head->filters))
238 		return false;
239 
240 	list_for_each_entry_safe(f, next, &head->filters, list) {
241 		fl_hw_destroy_filter(tp, (unsigned long)f);
242 		list_del_rcu(&f->list);
243 		call_rcu(&f->rcu, fl_destroy_filter);
244 	}
245 	RCU_INIT_POINTER(tp->root, NULL);
246 	if (head->mask_assigned)
247 		rhashtable_destroy(&head->ht);
248 	kfree_rcu(head, rcu);
249 	return true;
250 }
251 
252 static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
253 {
254 	struct cls_fl_head *head = rtnl_dereference(tp->root);
255 	struct cls_fl_filter *f;
256 
257 	list_for_each_entry(f, &head->filters, list)
258 		if (f->handle == handle)
259 			return (unsigned long) f;
260 	return 0;
261 }
262 
263 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
264 	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
265 	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
266 	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
267 					    .len = IFNAMSIZ },
268 	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
269 	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
270 	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
271 	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
272 	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
273 	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
274 	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
275 	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
276 	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
277 	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
278 	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
279 	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
280 	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
281 	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
282 	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
283 	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
284 	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
285 	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
286 };
287 
288 static void fl_set_key_val(struct nlattr **tb,
289 			   void *val, int val_type,
290 			   void *mask, int mask_type, int len)
291 {
292 	if (!tb[val_type])
293 		return;
294 	memcpy(val, nla_data(tb[val_type]), len);
295 	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
296 		memset(mask, 0xff, len);
297 	else
298 		memcpy(mask, nla_data(tb[mask_type]), len);
299 }
300 
301 static int fl_set_key(struct net *net, struct nlattr **tb,
302 		      struct fl_flow_key *key, struct fl_flow_key *mask)
303 {
304 #ifdef CONFIG_NET_CLS_IND
305 	if (tb[TCA_FLOWER_INDEV]) {
306 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
307 		if (err < 0)
308 			return err;
309 		key->indev_ifindex = err;
310 		mask->indev_ifindex = 0xffffffff;
311 	}
312 #endif
313 
314 	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
315 		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
316 		       sizeof(key->eth.dst));
317 	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
318 		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
319 		       sizeof(key->eth.src));
320 
321 	fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
322 		       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
323 		       sizeof(key->basic.n_proto));
324 
325 	if (key->basic.n_proto == htons(ETH_P_IP) ||
326 	    key->basic.n_proto == htons(ETH_P_IPV6)) {
327 		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
328 			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
329 			       sizeof(key->basic.ip_proto));
330 	}
331 
332 	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
333 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
334 		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
335 			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
336 			       sizeof(key->ipv4.src));
337 		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
338 			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
339 			       sizeof(key->ipv4.dst));
340 	} else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
341 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
342 		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
343 			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
344 			       sizeof(key->ipv6.src));
345 		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
346 			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
347 			       sizeof(key->ipv6.dst));
348 	}
349 
350 	if (key->basic.ip_proto == IPPROTO_TCP) {
351 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
352 			       &mask->tp.src, TCA_FLOWER_UNSPEC,
353 			       sizeof(key->tp.src));
354 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
355 			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
356 			       sizeof(key->tp.dst));
357 	} else if (key->basic.ip_proto == IPPROTO_UDP) {
358 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
359 			       &mask->tp.src, TCA_FLOWER_UNSPEC,
360 			       sizeof(key->tp.src));
361 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
362 			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
363 			       sizeof(key->tp.dst));
364 	}
365 
366 	return 0;
367 }
368 
369 static bool fl_mask_eq(struct fl_flow_mask *mask1,
370 		       struct fl_flow_mask *mask2)
371 {
372 	const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
373 	const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
374 
375 	return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
376 	       !memcmp(lmask1, lmask2, fl_mask_range(mask1));
377 }
378 
379 static const struct rhashtable_params fl_ht_params = {
380 	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
381 	.head_offset = offsetof(struct cls_fl_filter, ht_node),
382 	.automatic_shrinking = true,
383 };
384 
385 static int fl_init_hashtable(struct cls_fl_head *head,
386 			     struct fl_flow_mask *mask)
387 {
388 	head->ht_params = fl_ht_params;
389 	head->ht_params.key_len = fl_mask_range(mask);
390 	head->ht_params.key_offset += mask->range.start;
391 
392 	return rhashtable_init(&head->ht, &head->ht_params);
393 }
394 
395 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
396 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
397 #define FL_KEY_MEMBER_END_OFFSET(member)					\
398 	(FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
399 
400 #define FL_KEY_IN_RANGE(mask, member)						\
401         (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end &&			\
402          FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
403 
404 #define FL_KEY_SET(keys, cnt, id, member)					\
405 	do {									\
406 		keys[cnt].key_id = id;						\
407 		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
408 		cnt++;								\
409 	} while(0);
410 
411 #define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member)			\
412 	do {									\
413 		if (FL_KEY_IN_RANGE(mask, member))				\
414 			FL_KEY_SET(keys, cnt, id, member);			\
415 	} while(0);
416 
417 static void fl_init_dissector(struct cls_fl_head *head,
418 			      struct fl_flow_mask *mask)
419 {
420 	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
421 	size_t cnt = 0;
422 
423 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
424 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
425 	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
426 			       FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
427 	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
428 			       FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
429 	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
430 			       FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
431 	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
432 			       FLOW_DISSECTOR_KEY_PORTS, tp);
433 
434 	skb_flow_dissector_init(&head->dissector, keys, cnt);
435 }
436 
437 static int fl_check_assign_mask(struct cls_fl_head *head,
438 				struct fl_flow_mask *mask)
439 {
440 	int err;
441 
442 	if (head->mask_assigned) {
443 		if (!fl_mask_eq(&head->mask, mask))
444 			return -EINVAL;
445 		else
446 			return 0;
447 	}
448 
449 	/* Mask is not assigned yet. So assign it and init hashtable
450 	 * according to that.
451 	 */
452 	err = fl_init_hashtable(head, mask);
453 	if (err)
454 		return err;
455 	memcpy(&head->mask, mask, sizeof(head->mask));
456 	head->mask_assigned = true;
457 
458 	fl_init_dissector(head, mask);
459 
460 	return 0;
461 }
462 
463 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
464 			struct cls_fl_filter *f, struct fl_flow_mask *mask,
465 			unsigned long base, struct nlattr **tb,
466 			struct nlattr *est, bool ovr)
467 {
468 	struct tcf_exts e;
469 	int err;
470 
471 	tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
472 	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
473 	if (err < 0)
474 		return err;
475 
476 	if (tb[TCA_FLOWER_CLASSID]) {
477 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
478 		tcf_bind_filter(tp, &f->res, base);
479 	}
480 
481 	err = fl_set_key(net, tb, &f->key, &mask->key);
482 	if (err)
483 		goto errout;
484 
485 	fl_mask_update_range(mask);
486 	fl_set_masked_key(&f->mkey, &f->key, mask);
487 
488 	tcf_exts_change(tp, &f->exts, &e);
489 
490 	return 0;
491 errout:
492 	tcf_exts_destroy(&e);
493 	return err;
494 }
495 
496 static u32 fl_grab_new_handle(struct tcf_proto *tp,
497 			      struct cls_fl_head *head)
498 {
499 	unsigned int i = 0x80000000;
500 	u32 handle;
501 
502 	do {
503 		if (++head->hgen == 0x7FFFFFFF)
504 			head->hgen = 1;
505 	} while (--i > 0 && fl_get(tp, head->hgen));
506 
507 	if (unlikely(i == 0)) {
508 		pr_err("Insufficient number of handles\n");
509 		handle = 0;
510 	} else {
511 		handle = head->hgen;
512 	}
513 
514 	return handle;
515 }
516 
517 static int fl_change(struct net *net, struct sk_buff *in_skb,
518 		     struct tcf_proto *tp, unsigned long base,
519 		     u32 handle, struct nlattr **tca,
520 		     unsigned long *arg, bool ovr)
521 {
522 	struct cls_fl_head *head = rtnl_dereference(tp->root);
523 	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
524 	struct cls_fl_filter *fnew;
525 	struct nlattr *tb[TCA_FLOWER_MAX + 1];
526 	struct fl_flow_mask mask = {};
527 	u32 flags = 0;
528 	int err;
529 
530 	if (!tca[TCA_OPTIONS])
531 		return -EINVAL;
532 
533 	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
534 	if (err < 0)
535 		return err;
536 
537 	if (fold && handle && fold->handle != handle)
538 		return -EINVAL;
539 
540 	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
541 	if (!fnew)
542 		return -ENOBUFS;
543 
544 	tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
545 
546 	if (!handle) {
547 		handle = fl_grab_new_handle(tp, head);
548 		if (!handle) {
549 			err = -EINVAL;
550 			goto errout;
551 		}
552 	}
553 	fnew->handle = handle;
554 
555 	if (tb[TCA_FLOWER_FLAGS])
556 		flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
557 
558 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
559 	if (err)
560 		goto errout;
561 
562 	err = fl_check_assign_mask(head, &mask);
563 	if (err)
564 		goto errout;
565 
566 	err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
567 				     head->ht_params);
568 	if (err)
569 		goto errout;
570 
571 	fl_hw_replace_filter(tp,
572 			     &head->dissector,
573 			     &mask.key,
574 			     &fnew->key,
575 			     &fnew->exts,
576 			     (unsigned long)fnew,
577 			     flags);
578 
579 	if (fold) {
580 		rhashtable_remove_fast(&head->ht, &fold->ht_node,
581 				       head->ht_params);
582 		fl_hw_destroy_filter(tp, (unsigned long)fold);
583 	}
584 
585 	*arg = (unsigned long) fnew;
586 
587 	if (fold) {
588 		list_replace_rcu(&fold->list, &fnew->list);
589 		tcf_unbind_filter(tp, &fold->res);
590 		call_rcu(&fold->rcu, fl_destroy_filter);
591 	} else {
592 		list_add_tail_rcu(&fnew->list, &head->filters);
593 	}
594 
595 	return 0;
596 
597 errout:
598 	kfree(fnew);
599 	return err;
600 }
601 
602 static int fl_delete(struct tcf_proto *tp, unsigned long arg)
603 {
604 	struct cls_fl_head *head = rtnl_dereference(tp->root);
605 	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
606 
607 	rhashtable_remove_fast(&head->ht, &f->ht_node,
608 			       head->ht_params);
609 	list_del_rcu(&f->list);
610 	fl_hw_destroy_filter(tp, (unsigned long)f);
611 	tcf_unbind_filter(tp, &f->res);
612 	call_rcu(&f->rcu, fl_destroy_filter);
613 	return 0;
614 }
615 
616 static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
617 {
618 	struct cls_fl_head *head = rtnl_dereference(tp->root);
619 	struct cls_fl_filter *f;
620 
621 	list_for_each_entry_rcu(f, &head->filters, list) {
622 		if (arg->count < arg->skip)
623 			goto skip;
624 		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
625 			arg->stop = 1;
626 			break;
627 		}
628 skip:
629 		arg->count++;
630 	}
631 }
632 
633 static int fl_dump_key_val(struct sk_buff *skb,
634 			   void *val, int val_type,
635 			   void *mask, int mask_type, int len)
636 {
637 	int err;
638 
639 	if (!memchr_inv(mask, 0, len))
640 		return 0;
641 	err = nla_put(skb, val_type, len, val);
642 	if (err)
643 		return err;
644 	if (mask_type != TCA_FLOWER_UNSPEC) {
645 		err = nla_put(skb, mask_type, len, mask);
646 		if (err)
647 			return err;
648 	}
649 	return 0;
650 }
651 
652 static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
653 		   struct sk_buff *skb, struct tcmsg *t)
654 {
655 	struct cls_fl_head *head = rtnl_dereference(tp->root);
656 	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
657 	struct nlattr *nest;
658 	struct fl_flow_key *key, *mask;
659 
660 	if (!f)
661 		return skb->len;
662 
663 	t->tcm_handle = f->handle;
664 
665 	nest = nla_nest_start(skb, TCA_OPTIONS);
666 	if (!nest)
667 		goto nla_put_failure;
668 
669 	if (f->res.classid &&
670 	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
671 		goto nla_put_failure;
672 
673 	key = &f->key;
674 	mask = &head->mask.key;
675 
676 	if (mask->indev_ifindex) {
677 		struct net_device *dev;
678 
679 		dev = __dev_get_by_index(net, key->indev_ifindex);
680 		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
681 			goto nla_put_failure;
682 	}
683 
684 	fl_hw_update_stats(tp, f);
685 
686 	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
687 			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
688 			    sizeof(key->eth.dst)) ||
689 	    fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
690 			    mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
691 			    sizeof(key->eth.src)) ||
692 	    fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
693 			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
694 			    sizeof(key->basic.n_proto)))
695 		goto nla_put_failure;
696 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
697 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
698 	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
699 			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
700 			    sizeof(key->basic.ip_proto)))
701 		goto nla_put_failure;
702 
703 	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
704 	    (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
705 			     &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
706 			     sizeof(key->ipv4.src)) ||
707 	     fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
708 			     &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
709 			     sizeof(key->ipv4.dst))))
710 		goto nla_put_failure;
711 	else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
712 		 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
713 				  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
714 				  sizeof(key->ipv6.src)) ||
715 		  fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
716 				  &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
717 				  sizeof(key->ipv6.dst))))
718 		goto nla_put_failure;
719 
720 	if (key->basic.ip_proto == IPPROTO_TCP &&
721 	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
722 			     &mask->tp.src, TCA_FLOWER_UNSPEC,
723 			     sizeof(key->tp.src)) ||
724 	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
725 			     &mask->tp.dst, TCA_FLOWER_UNSPEC,
726 			     sizeof(key->tp.dst))))
727 		goto nla_put_failure;
728 	else if (key->basic.ip_proto == IPPROTO_UDP &&
729 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
730 				  &mask->tp.src, TCA_FLOWER_UNSPEC,
731 				  sizeof(key->tp.src)) ||
732 		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
733 				  &mask->tp.dst, TCA_FLOWER_UNSPEC,
734 				  sizeof(key->tp.dst))))
735 		goto nla_put_failure;
736 
737 	if (tcf_exts_dump(skb, &f->exts))
738 		goto nla_put_failure;
739 
740 	nla_nest_end(skb, nest);
741 
742 	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
743 		goto nla_put_failure;
744 
745 	return skb->len;
746 
747 nla_put_failure:
748 	nla_nest_cancel(skb, nest);
749 	return -1;
750 }
751 
752 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
753 	.kind		= "flower",
754 	.classify	= fl_classify,
755 	.init		= fl_init,
756 	.destroy	= fl_destroy,
757 	.get		= fl_get,
758 	.change		= fl_change,
759 	.delete		= fl_delete,
760 	.walk		= fl_walk,
761 	.dump		= fl_dump,
762 	.owner		= THIS_MODULE,
763 };
764 
765 static int __init cls_fl_init(void)
766 {
767 	return register_tcf_proto_ops(&cls_fl_ops);
768 }
769 
770 static void __exit cls_fl_exit(void)
771 {
772 	unregister_tcf_proto_ops(&cls_fl_ops);
773 }
774 
775 module_init(cls_fl_init);
776 module_exit(cls_fl_exit);
777 
778 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
779 MODULE_DESCRIPTION("Flower classifier");
780 MODULE_LICENSE("GPL v2");
781