xref: /openbmc/linux/net/sched/cls_flower.c (revision a77e393c)
1 /*
2  * net/sched/cls_flower.c		Flower classifier
3  *
4  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/rhashtable.h>
16 
17 #include <linux/if_ether.h>
18 #include <linux/in6.h>
19 #include <linux/ip.h>
20 
21 #include <net/sch_generic.h>
22 #include <net/pkt_cls.h>
23 #include <net/ip.h>
24 #include <net/flow_dissector.h>
25 
26 #include <net/dst.h>
27 #include <net/dst_metadata.h>
28 
29 struct fl_flow_key {
30 	int	indev_ifindex;
31 	struct flow_dissector_key_control control;
32 	struct flow_dissector_key_control enc_control;
33 	struct flow_dissector_key_basic basic;
34 	struct flow_dissector_key_eth_addrs eth;
35 	struct flow_dissector_key_vlan vlan;
36 	union {
37 		struct flow_dissector_key_ipv4_addrs ipv4;
38 		struct flow_dissector_key_ipv6_addrs ipv6;
39 	};
40 	struct flow_dissector_key_ports tp;
41 	struct flow_dissector_key_keyid enc_key_id;
42 	union {
43 		struct flow_dissector_key_ipv4_addrs enc_ipv4;
44 		struct flow_dissector_key_ipv6_addrs enc_ipv6;
45 	};
46 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
47 
48 struct fl_flow_mask_range {
49 	unsigned short int start;
50 	unsigned short int end;
51 };
52 
53 struct fl_flow_mask {
54 	struct fl_flow_key key;
55 	struct fl_flow_mask_range range;
56 	struct rcu_head	rcu;
57 };
58 
59 struct cls_fl_head {
60 	struct rhashtable ht;
61 	struct fl_flow_mask mask;
62 	struct flow_dissector dissector;
63 	u32 hgen;
64 	bool mask_assigned;
65 	struct list_head filters;
66 	struct rhashtable_params ht_params;
67 	struct rcu_head rcu;
68 };
69 
70 struct cls_fl_filter {
71 	struct rhash_head ht_node;
72 	struct fl_flow_key mkey;
73 	struct tcf_exts exts;
74 	struct tcf_result res;
75 	struct fl_flow_key key;
76 	struct list_head list;
77 	u32 handle;
78 	u32 flags;
79 	struct rcu_head	rcu;
80 };
81 
82 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
83 {
84 	return mask->range.end - mask->range.start;
85 }
86 
87 static void fl_mask_update_range(struct fl_flow_mask *mask)
88 {
89 	const u8 *bytes = (const u8 *) &mask->key;
90 	size_t size = sizeof(mask->key);
91 	size_t i, first = 0, last = size - 1;
92 
93 	for (i = 0; i < sizeof(mask->key); i++) {
94 		if (bytes[i]) {
95 			if (!first && i)
96 				first = i;
97 			last = i;
98 		}
99 	}
100 	mask->range.start = rounddown(first, sizeof(long));
101 	mask->range.end = roundup(last + 1, sizeof(long));
102 }
103 
104 static void *fl_key_get_start(struct fl_flow_key *key,
105 			      const struct fl_flow_mask *mask)
106 {
107 	return (u8 *) key + mask->range.start;
108 }
109 
110 static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
111 			      struct fl_flow_mask *mask)
112 {
113 	const long *lkey = fl_key_get_start(key, mask);
114 	const long *lmask = fl_key_get_start(&mask->key, mask);
115 	long *lmkey = fl_key_get_start(mkey, mask);
116 	int i;
117 
118 	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
119 		*lmkey++ = *lkey++ & *lmask++;
120 }
121 
122 static void fl_clear_masked_range(struct fl_flow_key *key,
123 				  struct fl_flow_mask *mask)
124 {
125 	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
126 }
127 
128 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
129 		       struct tcf_result *res)
130 {
131 	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
132 	struct cls_fl_filter *f;
133 	struct fl_flow_key skb_key;
134 	struct fl_flow_key skb_mkey;
135 	struct ip_tunnel_info *info;
136 
137 	if (!atomic_read(&head->ht.nelems))
138 		return -1;
139 
140 	fl_clear_masked_range(&skb_key, &head->mask);
141 
142 	info = skb_tunnel_info(skb);
143 	if (info) {
144 		struct ip_tunnel_key *key = &info->key;
145 
146 		switch (ip_tunnel_info_af(info)) {
147 		case AF_INET:
148 			skb_key.enc_ipv4.src = key->u.ipv4.src;
149 			skb_key.enc_ipv4.dst = key->u.ipv4.dst;
150 			break;
151 		case AF_INET6:
152 			skb_key.enc_ipv6.src = key->u.ipv6.src;
153 			skb_key.enc_ipv6.dst = key->u.ipv6.dst;
154 			break;
155 		}
156 
157 		skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
158 	}
159 
160 	skb_key.indev_ifindex = skb->skb_iif;
161 	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
162 	 * so do it rather here.
163 	 */
164 	skb_key.basic.n_proto = skb->protocol;
165 	skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
166 
167 	fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
168 
169 	f = rhashtable_lookup_fast(&head->ht,
170 				   fl_key_get_start(&skb_mkey, &head->mask),
171 				   head->ht_params);
172 	if (f && !tc_skip_sw(f->flags)) {
173 		*res = f->res;
174 		return tcf_exts_exec(skb, &f->exts, res);
175 	}
176 	return -1;
177 }
178 
179 static int fl_init(struct tcf_proto *tp)
180 {
181 	struct cls_fl_head *head;
182 
183 	head = kzalloc(sizeof(*head), GFP_KERNEL);
184 	if (!head)
185 		return -ENOBUFS;
186 
187 	INIT_LIST_HEAD_RCU(&head->filters);
188 	rcu_assign_pointer(tp->root, head);
189 
190 	return 0;
191 }
192 
193 static void fl_destroy_filter(struct rcu_head *head)
194 {
195 	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
196 
197 	tcf_exts_destroy(&f->exts);
198 	kfree(f);
199 }
200 
201 static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
202 {
203 	struct net_device *dev = tp->q->dev_queue->dev;
204 	struct tc_cls_flower_offload offload = {0};
205 	struct tc_to_netdev tc;
206 
207 	if (!tc_should_offload(dev, tp, 0))
208 		return;
209 
210 	offload.command = TC_CLSFLOWER_DESTROY;
211 	offload.cookie = cookie;
212 
213 	tc.type = TC_SETUP_CLSFLOWER;
214 	tc.cls_flower = &offload;
215 
216 	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
217 }
218 
219 static int fl_hw_replace_filter(struct tcf_proto *tp,
220 				struct flow_dissector *dissector,
221 				struct fl_flow_key *mask,
222 				struct fl_flow_key *key,
223 				struct tcf_exts *actions,
224 				unsigned long cookie, u32 flags)
225 {
226 	struct net_device *dev = tp->q->dev_queue->dev;
227 	struct tc_cls_flower_offload offload = {0};
228 	struct tc_to_netdev tc;
229 	int err;
230 
231 	if (!tc_should_offload(dev, tp, flags))
232 		return tc_skip_sw(flags) ? -EINVAL : 0;
233 
234 	offload.command = TC_CLSFLOWER_REPLACE;
235 	offload.cookie = cookie;
236 	offload.dissector = dissector;
237 	offload.mask = mask;
238 	offload.key = key;
239 	offload.exts = actions;
240 
241 	tc.type = TC_SETUP_CLSFLOWER;
242 	tc.cls_flower = &offload;
243 
244 	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
245 					    &tc);
246 
247 	if (tc_skip_sw(flags))
248 		return err;
249 
250 	return 0;
251 }
252 
253 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
254 {
255 	struct net_device *dev = tp->q->dev_queue->dev;
256 	struct tc_cls_flower_offload offload = {0};
257 	struct tc_to_netdev tc;
258 
259 	if (!tc_should_offload(dev, tp, 0))
260 		return;
261 
262 	offload.command = TC_CLSFLOWER_STATS;
263 	offload.cookie = (unsigned long)f;
264 	offload.exts = &f->exts;
265 
266 	tc.type = TC_SETUP_CLSFLOWER;
267 	tc.cls_flower = &offload;
268 
269 	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
270 }
271 
272 static bool fl_destroy(struct tcf_proto *tp, bool force)
273 {
274 	struct cls_fl_head *head = rtnl_dereference(tp->root);
275 	struct cls_fl_filter *f, *next;
276 
277 	if (!force && !list_empty(&head->filters))
278 		return false;
279 
280 	list_for_each_entry_safe(f, next, &head->filters, list) {
281 		fl_hw_destroy_filter(tp, (unsigned long)f);
282 		list_del_rcu(&f->list);
283 		call_rcu(&f->rcu, fl_destroy_filter);
284 	}
285 	RCU_INIT_POINTER(tp->root, NULL);
286 	if (head->mask_assigned)
287 		rhashtable_destroy(&head->ht);
288 	kfree_rcu(head, rcu);
289 	return true;
290 }
291 
292 static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
293 {
294 	struct cls_fl_head *head = rtnl_dereference(tp->root);
295 	struct cls_fl_filter *f;
296 
297 	list_for_each_entry(f, &head->filters, list)
298 		if (f->handle == handle)
299 			return (unsigned long) f;
300 	return 0;
301 }
302 
303 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
304 	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
305 	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
306 	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
307 					    .len = IFNAMSIZ },
308 	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
309 	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
310 	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
311 	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
312 	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
313 	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
314 	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
315 	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
316 	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
317 	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
318 	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
319 	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
320 	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
321 	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
322 	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
323 	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
324 	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
325 	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
326 	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
327 	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
328 	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
329 	[TCA_FLOWER_KEY_ENC_KEY_ID]	= { .type = NLA_U32 },
330 	[TCA_FLOWER_KEY_ENC_IPV4_SRC]	= { .type = NLA_U32 },
331 	[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
332 	[TCA_FLOWER_KEY_ENC_IPV4_DST]	= { .type = NLA_U32 },
333 	[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
334 	[TCA_FLOWER_KEY_ENC_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
335 	[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
336 	[TCA_FLOWER_KEY_ENC_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
337 	[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
338 	[TCA_FLOWER_KEY_TCP_SRC_MASK]	= { .type = NLA_U16 },
339 	[TCA_FLOWER_KEY_TCP_DST_MASK]	= { .type = NLA_U16 },
340 	[TCA_FLOWER_KEY_UDP_SRC_MASK]	= { .type = NLA_U16 },
341 	[TCA_FLOWER_KEY_UDP_DST_MASK]	= { .type = NLA_U16 },
342 };
343 
344 static void fl_set_key_val(struct nlattr **tb,
345 			   void *val, int val_type,
346 			   void *mask, int mask_type, int len)
347 {
348 	if (!tb[val_type])
349 		return;
350 	memcpy(val, nla_data(tb[val_type]), len);
351 	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
352 		memset(mask, 0xff, len);
353 	else
354 		memcpy(mask, nla_data(tb[mask_type]), len);
355 }
356 
357 static void fl_set_key_vlan(struct nlattr **tb,
358 			    struct flow_dissector_key_vlan *key_val,
359 			    struct flow_dissector_key_vlan *key_mask)
360 {
361 #define VLAN_PRIORITY_MASK	0x7
362 
363 	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
364 		key_val->vlan_id =
365 			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
366 		key_mask->vlan_id = VLAN_VID_MASK;
367 	}
368 	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
369 		key_val->vlan_priority =
370 			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
371 			VLAN_PRIORITY_MASK;
372 		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
373 	}
374 }
375 
376 static int fl_set_key(struct net *net, struct nlattr **tb,
377 		      struct fl_flow_key *key, struct fl_flow_key *mask)
378 {
379 	__be16 ethertype;
380 #ifdef CONFIG_NET_CLS_IND
381 	if (tb[TCA_FLOWER_INDEV]) {
382 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
383 		if (err < 0)
384 			return err;
385 		key->indev_ifindex = err;
386 		mask->indev_ifindex = 0xffffffff;
387 	}
388 #endif
389 
390 	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
391 		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
392 		       sizeof(key->eth.dst));
393 	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
394 		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
395 		       sizeof(key->eth.src));
396 
397 	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
398 		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
399 
400 		if (ethertype == htons(ETH_P_8021Q)) {
401 			fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
402 			fl_set_key_val(tb, &key->basic.n_proto,
403 				       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
404 				       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
405 				       sizeof(key->basic.n_proto));
406 		} else {
407 			key->basic.n_proto = ethertype;
408 			mask->basic.n_proto = cpu_to_be16(~0);
409 		}
410 	}
411 
412 	if (key->basic.n_proto == htons(ETH_P_IP) ||
413 	    key->basic.n_proto == htons(ETH_P_IPV6)) {
414 		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
415 			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
416 			       sizeof(key->basic.ip_proto));
417 	}
418 
419 	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
420 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
421 		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
422 			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
423 			       sizeof(key->ipv4.src));
424 		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
425 			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
426 			       sizeof(key->ipv4.dst));
427 	} else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
428 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
429 		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
430 			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
431 			       sizeof(key->ipv6.src));
432 		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
433 			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
434 			       sizeof(key->ipv6.dst));
435 	}
436 
437 	if (key->basic.ip_proto == IPPROTO_TCP) {
438 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
439 			       &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
440 			       sizeof(key->tp.src));
441 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
442 			       &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
443 			       sizeof(key->tp.dst));
444 	} else if (key->basic.ip_proto == IPPROTO_UDP) {
445 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
446 			       &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
447 			       sizeof(key->tp.src));
448 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
449 			       &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
450 			       sizeof(key->tp.dst));
451 	}
452 
453 	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
454 	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
455 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
456 		fl_set_key_val(tb, &key->enc_ipv4.src,
457 			       TCA_FLOWER_KEY_ENC_IPV4_SRC,
458 			       &mask->enc_ipv4.src,
459 			       TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
460 			       sizeof(key->enc_ipv4.src));
461 		fl_set_key_val(tb, &key->enc_ipv4.dst,
462 			       TCA_FLOWER_KEY_ENC_IPV4_DST,
463 			       &mask->enc_ipv4.dst,
464 			       TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
465 			       sizeof(key->enc_ipv4.dst));
466 	}
467 
468 	if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
469 	    tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
470 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
471 		fl_set_key_val(tb, &key->enc_ipv6.src,
472 			       TCA_FLOWER_KEY_ENC_IPV6_SRC,
473 			       &mask->enc_ipv6.src,
474 			       TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
475 			       sizeof(key->enc_ipv6.src));
476 		fl_set_key_val(tb, &key->enc_ipv6.dst,
477 			       TCA_FLOWER_KEY_ENC_IPV6_DST,
478 			       &mask->enc_ipv6.dst,
479 			       TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
480 			       sizeof(key->enc_ipv6.dst));
481 	}
482 
483 	fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
484 		       &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
485 		       sizeof(key->enc_key_id.keyid));
486 
487 	return 0;
488 }
489 
490 static bool fl_mask_eq(struct fl_flow_mask *mask1,
491 		       struct fl_flow_mask *mask2)
492 {
493 	const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
494 	const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
495 
496 	return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
497 	       !memcmp(lmask1, lmask2, fl_mask_range(mask1));
498 }
499 
500 static const struct rhashtable_params fl_ht_params = {
501 	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
502 	.head_offset = offsetof(struct cls_fl_filter, ht_node),
503 	.automatic_shrinking = true,
504 };
505 
506 static int fl_init_hashtable(struct cls_fl_head *head,
507 			     struct fl_flow_mask *mask)
508 {
509 	head->ht_params = fl_ht_params;
510 	head->ht_params.key_len = fl_mask_range(mask);
511 	head->ht_params.key_offset += mask->range.start;
512 
513 	return rhashtable_init(&head->ht, &head->ht_params);
514 }
515 
516 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
517 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
518 
519 #define FL_KEY_IS_MASKED(mask, member)						\
520 	memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),		\
521 		   0, FL_KEY_MEMBER_SIZE(member))				\
522 
523 #define FL_KEY_SET(keys, cnt, id, member)					\
524 	do {									\
525 		keys[cnt].key_id = id;						\
526 		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
527 		cnt++;								\
528 	} while(0);
529 
530 #define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)			\
531 	do {									\
532 		if (FL_KEY_IS_MASKED(mask, member))				\
533 			FL_KEY_SET(keys, cnt, id, member);			\
534 	} while(0);
535 
536 static void fl_init_dissector(struct cls_fl_head *head,
537 			      struct fl_flow_mask *mask)
538 {
539 	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
540 	size_t cnt = 0;
541 
542 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
543 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
544 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
545 			     FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
546 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
547 			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
548 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
549 			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
550 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
551 			     FLOW_DISSECTOR_KEY_PORTS, tp);
552 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
553 			     FLOW_DISSECTOR_KEY_VLAN, vlan);
554 
555 	skb_flow_dissector_init(&head->dissector, keys, cnt);
556 }
557 
558 static int fl_check_assign_mask(struct cls_fl_head *head,
559 				struct fl_flow_mask *mask)
560 {
561 	int err;
562 
563 	if (head->mask_assigned) {
564 		if (!fl_mask_eq(&head->mask, mask))
565 			return -EINVAL;
566 		else
567 			return 0;
568 	}
569 
570 	/* Mask is not assigned yet. So assign it and init hashtable
571 	 * according to that.
572 	 */
573 	err = fl_init_hashtable(head, mask);
574 	if (err)
575 		return err;
576 	memcpy(&head->mask, mask, sizeof(head->mask));
577 	head->mask_assigned = true;
578 
579 	fl_init_dissector(head, mask);
580 
581 	return 0;
582 }
583 
584 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
585 			struct cls_fl_filter *f, struct fl_flow_mask *mask,
586 			unsigned long base, struct nlattr **tb,
587 			struct nlattr *est, bool ovr)
588 {
589 	struct tcf_exts e;
590 	int err;
591 
592 	err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
593 	if (err < 0)
594 		return err;
595 	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
596 	if (err < 0)
597 		goto errout;
598 
599 	if (tb[TCA_FLOWER_CLASSID]) {
600 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
601 		tcf_bind_filter(tp, &f->res, base);
602 	}
603 
604 	err = fl_set_key(net, tb, &f->key, &mask->key);
605 	if (err)
606 		goto errout;
607 
608 	fl_mask_update_range(mask);
609 	fl_set_masked_key(&f->mkey, &f->key, mask);
610 
611 	tcf_exts_change(tp, &f->exts, &e);
612 
613 	return 0;
614 errout:
615 	tcf_exts_destroy(&e);
616 	return err;
617 }
618 
619 static u32 fl_grab_new_handle(struct tcf_proto *tp,
620 			      struct cls_fl_head *head)
621 {
622 	unsigned int i = 0x80000000;
623 	u32 handle;
624 
625 	do {
626 		if (++head->hgen == 0x7FFFFFFF)
627 			head->hgen = 1;
628 	} while (--i > 0 && fl_get(tp, head->hgen));
629 
630 	if (unlikely(i == 0)) {
631 		pr_err("Insufficient number of handles\n");
632 		handle = 0;
633 	} else {
634 		handle = head->hgen;
635 	}
636 
637 	return handle;
638 }
639 
640 static int fl_change(struct net *net, struct sk_buff *in_skb,
641 		     struct tcf_proto *tp, unsigned long base,
642 		     u32 handle, struct nlattr **tca,
643 		     unsigned long *arg, bool ovr)
644 {
645 	struct cls_fl_head *head = rtnl_dereference(tp->root);
646 	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
647 	struct cls_fl_filter *fnew;
648 	struct nlattr *tb[TCA_FLOWER_MAX + 1];
649 	struct fl_flow_mask mask = {};
650 	int err;
651 
652 	if (!tca[TCA_OPTIONS])
653 		return -EINVAL;
654 
655 	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
656 	if (err < 0)
657 		return err;
658 
659 	if (fold && handle && fold->handle != handle)
660 		return -EINVAL;
661 
662 	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
663 	if (!fnew)
664 		return -ENOBUFS;
665 
666 	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
667 	if (err < 0)
668 		goto errout;
669 
670 	if (!handle) {
671 		handle = fl_grab_new_handle(tp, head);
672 		if (!handle) {
673 			err = -EINVAL;
674 			goto errout;
675 		}
676 	}
677 	fnew->handle = handle;
678 
679 	if (tb[TCA_FLOWER_FLAGS]) {
680 		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
681 
682 		if (!tc_flags_valid(fnew->flags)) {
683 			err = -EINVAL;
684 			goto errout;
685 		}
686 	}
687 
688 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
689 	if (err)
690 		goto errout;
691 
692 	err = fl_check_assign_mask(head, &mask);
693 	if (err)
694 		goto errout;
695 
696 	if (!tc_skip_sw(fnew->flags)) {
697 		err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
698 					     head->ht_params);
699 		if (err)
700 			goto errout;
701 	}
702 
703 	err = fl_hw_replace_filter(tp,
704 				   &head->dissector,
705 				   &mask.key,
706 				   &fnew->key,
707 				   &fnew->exts,
708 				   (unsigned long)fnew,
709 				   fnew->flags);
710 	if (err)
711 		goto errout;
712 
713 	if (fold) {
714 		rhashtable_remove_fast(&head->ht, &fold->ht_node,
715 				       head->ht_params);
716 		fl_hw_destroy_filter(tp, (unsigned long)fold);
717 	}
718 
719 	*arg = (unsigned long) fnew;
720 
721 	if (fold) {
722 		list_replace_rcu(&fold->list, &fnew->list);
723 		tcf_unbind_filter(tp, &fold->res);
724 		call_rcu(&fold->rcu, fl_destroy_filter);
725 	} else {
726 		list_add_tail_rcu(&fnew->list, &head->filters);
727 	}
728 
729 	return 0;
730 
731 errout:
732 	tcf_exts_destroy(&fnew->exts);
733 	kfree(fnew);
734 	return err;
735 }
736 
737 static int fl_delete(struct tcf_proto *tp, unsigned long arg)
738 {
739 	struct cls_fl_head *head = rtnl_dereference(tp->root);
740 	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
741 
742 	rhashtable_remove_fast(&head->ht, &f->ht_node,
743 			       head->ht_params);
744 	list_del_rcu(&f->list);
745 	fl_hw_destroy_filter(tp, (unsigned long)f);
746 	tcf_unbind_filter(tp, &f->res);
747 	call_rcu(&f->rcu, fl_destroy_filter);
748 	return 0;
749 }
750 
751 static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
752 {
753 	struct cls_fl_head *head = rtnl_dereference(tp->root);
754 	struct cls_fl_filter *f;
755 
756 	list_for_each_entry_rcu(f, &head->filters, list) {
757 		if (arg->count < arg->skip)
758 			goto skip;
759 		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
760 			arg->stop = 1;
761 			break;
762 		}
763 skip:
764 		arg->count++;
765 	}
766 }
767 
768 static int fl_dump_key_val(struct sk_buff *skb,
769 			   void *val, int val_type,
770 			   void *mask, int mask_type, int len)
771 {
772 	int err;
773 
774 	if (!memchr_inv(mask, 0, len))
775 		return 0;
776 	err = nla_put(skb, val_type, len, val);
777 	if (err)
778 		return err;
779 	if (mask_type != TCA_FLOWER_UNSPEC) {
780 		err = nla_put(skb, mask_type, len, mask);
781 		if (err)
782 			return err;
783 	}
784 	return 0;
785 }
786 
787 static int fl_dump_key_vlan(struct sk_buff *skb,
788 			    struct flow_dissector_key_vlan *vlan_key,
789 			    struct flow_dissector_key_vlan *vlan_mask)
790 {
791 	int err;
792 
793 	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
794 		return 0;
795 	if (vlan_mask->vlan_id) {
796 		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
797 				  vlan_key->vlan_id);
798 		if (err)
799 			return err;
800 	}
801 	if (vlan_mask->vlan_priority) {
802 		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
803 				 vlan_key->vlan_priority);
804 		if (err)
805 			return err;
806 	}
807 	return 0;
808 }
809 
810 static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
811 		   struct sk_buff *skb, struct tcmsg *t)
812 {
813 	struct cls_fl_head *head = rtnl_dereference(tp->root);
814 	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
815 	struct nlattr *nest;
816 	struct fl_flow_key *key, *mask;
817 
818 	if (!f)
819 		return skb->len;
820 
821 	t->tcm_handle = f->handle;
822 
823 	nest = nla_nest_start(skb, TCA_OPTIONS);
824 	if (!nest)
825 		goto nla_put_failure;
826 
827 	if (f->res.classid &&
828 	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
829 		goto nla_put_failure;
830 
831 	key = &f->key;
832 	mask = &head->mask.key;
833 
834 	if (mask->indev_ifindex) {
835 		struct net_device *dev;
836 
837 		dev = __dev_get_by_index(net, key->indev_ifindex);
838 		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
839 			goto nla_put_failure;
840 	}
841 
842 	fl_hw_update_stats(tp, f);
843 
844 	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
845 			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
846 			    sizeof(key->eth.dst)) ||
847 	    fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
848 			    mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
849 			    sizeof(key->eth.src)) ||
850 	    fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
851 			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
852 			    sizeof(key->basic.n_proto)))
853 		goto nla_put_failure;
854 
855 	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
856 		goto nla_put_failure;
857 
858 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
859 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
860 	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
861 			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
862 			    sizeof(key->basic.ip_proto)))
863 		goto nla_put_failure;
864 
865 	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
866 	    (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
867 			     &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
868 			     sizeof(key->ipv4.src)) ||
869 	     fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
870 			     &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
871 			     sizeof(key->ipv4.dst))))
872 		goto nla_put_failure;
873 	else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
874 		 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
875 				  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
876 				  sizeof(key->ipv6.src)) ||
877 		  fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
878 				  &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
879 				  sizeof(key->ipv6.dst))))
880 		goto nla_put_failure;
881 
882 	if (key->basic.ip_proto == IPPROTO_TCP &&
883 	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
884 			     &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
885 			     sizeof(key->tp.src)) ||
886 	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
887 			     &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
888 			     sizeof(key->tp.dst))))
889 		goto nla_put_failure;
890 	else if (key->basic.ip_proto == IPPROTO_UDP &&
891 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
892 				  &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
893 				  sizeof(key->tp.src)) ||
894 		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
895 				  &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
896 				  sizeof(key->tp.dst))))
897 		goto nla_put_failure;
898 
899 	if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
900 	    (fl_dump_key_val(skb, &key->enc_ipv4.src,
901 			    TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
902 			    TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
903 			    sizeof(key->enc_ipv4.src)) ||
904 	     fl_dump_key_val(skb, &key->enc_ipv4.dst,
905 			     TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
906 			     TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
907 			     sizeof(key->enc_ipv4.dst))))
908 		goto nla_put_failure;
909 	else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
910 		 (fl_dump_key_val(skb, &key->enc_ipv6.src,
911 			    TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
912 			    TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
913 			    sizeof(key->enc_ipv6.src)) ||
914 		 fl_dump_key_val(skb, &key->enc_ipv6.dst,
915 				 TCA_FLOWER_KEY_ENC_IPV6_DST,
916 				 &mask->enc_ipv6.dst,
917 				 TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
918 			    sizeof(key->enc_ipv6.dst))))
919 		goto nla_put_failure;
920 
921 	if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
922 			    &mask->enc_key_id, TCA_FLOWER_UNSPEC,
923 			    sizeof(key->enc_key_id)))
924 		goto nla_put_failure;
925 
926 	nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
927 
928 	if (tcf_exts_dump(skb, &f->exts))
929 		goto nla_put_failure;
930 
931 	nla_nest_end(skb, nest);
932 
933 	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
934 		goto nla_put_failure;
935 
936 	return skb->len;
937 
938 nla_put_failure:
939 	nla_nest_cancel(skb, nest);
940 	return -1;
941 }
942 
943 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
944 	.kind		= "flower",
945 	.classify	= fl_classify,
946 	.init		= fl_init,
947 	.destroy	= fl_destroy,
948 	.get		= fl_get,
949 	.change		= fl_change,
950 	.delete		= fl_delete,
951 	.walk		= fl_walk,
952 	.dump		= fl_dump,
953 	.owner		= THIS_MODULE,
954 };
955 
956 static int __init cls_fl_init(void)
957 {
958 	return register_tcf_proto_ops(&cls_fl_ops);
959 }
960 
961 static void __exit cls_fl_exit(void)
962 {
963 	unregister_tcf_proto_ops(&cls_fl_ops);
964 }
965 
966 module_init(cls_fl_init);
967 module_exit(cls_fl_exit);
968 
969 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
970 MODULE_DESCRIPTION("Flower classifier");
971 MODULE_LICENSE("GPL v2");
972