xref: /openbmc/linux/net/sched/cls_flower.c (revision 8ee90c5c)
1 /*
2  * net/sched/cls_flower.c		Flower classifier
3  *
4  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/rhashtable.h>
16 #include <linux/workqueue.h>
17 
18 #include <linux/if_ether.h>
19 #include <linux/in6.h>
20 #include <linux/ip.h>
21 #include <linux/mpls.h>
22 
23 #include <net/sch_generic.h>
24 #include <net/pkt_cls.h>
25 #include <net/ip.h>
26 #include <net/flow_dissector.h>
27 
28 #include <net/dst.h>
29 #include <net/dst_metadata.h>
30 
31 struct fl_flow_key {
32 	int	indev_ifindex;
33 	struct flow_dissector_key_control control;
34 	struct flow_dissector_key_control enc_control;
35 	struct flow_dissector_key_basic basic;
36 	struct flow_dissector_key_eth_addrs eth;
37 	struct flow_dissector_key_vlan vlan;
38 	union {
39 		struct flow_dissector_key_ipv4_addrs ipv4;
40 		struct flow_dissector_key_ipv6_addrs ipv6;
41 	};
42 	struct flow_dissector_key_ports tp;
43 	struct flow_dissector_key_icmp icmp;
44 	struct flow_dissector_key_arp arp;
45 	struct flow_dissector_key_keyid enc_key_id;
46 	union {
47 		struct flow_dissector_key_ipv4_addrs enc_ipv4;
48 		struct flow_dissector_key_ipv6_addrs enc_ipv6;
49 	};
50 	struct flow_dissector_key_ports enc_tp;
51 	struct flow_dissector_key_mpls mpls;
52 	struct flow_dissector_key_tcp tcp;
53 	struct flow_dissector_key_ip ip;
54 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
55 
56 struct fl_flow_mask_range {
57 	unsigned short int start;
58 	unsigned short int end;
59 };
60 
61 struct fl_flow_mask {
62 	struct fl_flow_key key;
63 	struct fl_flow_mask_range range;
64 	struct rcu_head	rcu;
65 };
66 
67 struct cls_fl_head {
68 	struct rhashtable ht;
69 	struct fl_flow_mask mask;
70 	struct flow_dissector dissector;
71 	bool mask_assigned;
72 	struct list_head filters;
73 	struct rhashtable_params ht_params;
74 	union {
75 		struct work_struct work;
76 		struct rcu_head	rcu;
77 	};
78 	struct idr handle_idr;
79 };
80 
81 struct cls_fl_filter {
82 	struct rhash_head ht_node;
83 	struct fl_flow_key mkey;
84 	struct tcf_exts exts;
85 	struct tcf_result res;
86 	struct fl_flow_key key;
87 	struct list_head list;
88 	u32 handle;
89 	u32 flags;
90 	union {
91 		struct work_struct work;
92 		struct rcu_head	rcu;
93 	};
94 	struct net_device *hw_dev;
95 };
96 
97 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
98 {
99 	return mask->range.end - mask->range.start;
100 }
101 
102 static void fl_mask_update_range(struct fl_flow_mask *mask)
103 {
104 	const u8 *bytes = (const u8 *) &mask->key;
105 	size_t size = sizeof(mask->key);
106 	size_t i, first = 0, last = size - 1;
107 
108 	for (i = 0; i < sizeof(mask->key); i++) {
109 		if (bytes[i]) {
110 			if (!first && i)
111 				first = i;
112 			last = i;
113 		}
114 	}
115 	mask->range.start = rounddown(first, sizeof(long));
116 	mask->range.end = roundup(last + 1, sizeof(long));
117 }
118 
119 static void *fl_key_get_start(struct fl_flow_key *key,
120 			      const struct fl_flow_mask *mask)
121 {
122 	return (u8 *) key + mask->range.start;
123 }
124 
125 static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
126 			      struct fl_flow_mask *mask)
127 {
128 	const long *lkey = fl_key_get_start(key, mask);
129 	const long *lmask = fl_key_get_start(&mask->key, mask);
130 	long *lmkey = fl_key_get_start(mkey, mask);
131 	int i;
132 
133 	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
134 		*lmkey++ = *lkey++ & *lmask++;
135 }
136 
137 static void fl_clear_masked_range(struct fl_flow_key *key,
138 				  struct fl_flow_mask *mask)
139 {
140 	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
141 }
142 
143 static struct cls_fl_filter *fl_lookup(struct cls_fl_head *head,
144 				       struct fl_flow_key *mkey)
145 {
146 	return rhashtable_lookup_fast(&head->ht,
147 				      fl_key_get_start(mkey, &head->mask),
148 				      head->ht_params);
149 }
150 
151 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
152 		       struct tcf_result *res)
153 {
154 	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
155 	struct cls_fl_filter *f;
156 	struct fl_flow_key skb_key;
157 	struct fl_flow_key skb_mkey;
158 	struct ip_tunnel_info *info;
159 
160 	if (!atomic_read(&head->ht.nelems))
161 		return -1;
162 
163 	fl_clear_masked_range(&skb_key, &head->mask);
164 
165 	info = skb_tunnel_info(skb);
166 	if (info) {
167 		struct ip_tunnel_key *key = &info->key;
168 
169 		switch (ip_tunnel_info_af(info)) {
170 		case AF_INET:
171 			skb_key.enc_control.addr_type =
172 				FLOW_DISSECTOR_KEY_IPV4_ADDRS;
173 			skb_key.enc_ipv4.src = key->u.ipv4.src;
174 			skb_key.enc_ipv4.dst = key->u.ipv4.dst;
175 			break;
176 		case AF_INET6:
177 			skb_key.enc_control.addr_type =
178 				FLOW_DISSECTOR_KEY_IPV6_ADDRS;
179 			skb_key.enc_ipv6.src = key->u.ipv6.src;
180 			skb_key.enc_ipv6.dst = key->u.ipv6.dst;
181 			break;
182 		}
183 
184 		skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
185 		skb_key.enc_tp.src = key->tp_src;
186 		skb_key.enc_tp.dst = key->tp_dst;
187 	}
188 
189 	skb_key.indev_ifindex = skb->skb_iif;
190 	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
191 	 * so do it rather here.
192 	 */
193 	skb_key.basic.n_proto = skb->protocol;
194 	skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
195 
196 	fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
197 
198 	f = fl_lookup(head, &skb_mkey);
199 	if (f && !tc_skip_sw(f->flags)) {
200 		*res = f->res;
201 		return tcf_exts_exec(skb, &f->exts, res);
202 	}
203 	return -1;
204 }
205 
206 static int fl_init(struct tcf_proto *tp)
207 {
208 	struct cls_fl_head *head;
209 
210 	head = kzalloc(sizeof(*head), GFP_KERNEL);
211 	if (!head)
212 		return -ENOBUFS;
213 
214 	INIT_LIST_HEAD_RCU(&head->filters);
215 	rcu_assign_pointer(tp->root, head);
216 	idr_init(&head->handle_idr);
217 
218 	return 0;
219 }
220 
221 static void fl_destroy_filter_work(struct work_struct *work)
222 {
223 	struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
224 
225 	rtnl_lock();
226 	tcf_exts_destroy(&f->exts);
227 	kfree(f);
228 	rtnl_unlock();
229 }
230 
231 static void fl_destroy_filter(struct rcu_head *head)
232 {
233 	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
234 
235 	INIT_WORK(&f->work, fl_destroy_filter_work);
236 	tcf_queue_work(&f->work);
237 }
238 
239 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
240 {
241 	struct tc_cls_flower_offload cls_flower = {};
242 	struct net_device *dev = f->hw_dev;
243 
244 	if (!tc_can_offload(dev))
245 		return;
246 
247 	tc_cls_common_offload_init(&cls_flower.common, tp);
248 	cls_flower.command = TC_CLSFLOWER_DESTROY;
249 	cls_flower.cookie = (unsigned long) f;
250 	cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev;
251 
252 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower);
253 }
254 
255 static int fl_hw_replace_filter(struct tcf_proto *tp,
256 				struct flow_dissector *dissector,
257 				struct fl_flow_key *mask,
258 				struct cls_fl_filter *f)
259 {
260 	struct net_device *dev = tp->q->dev_queue->dev;
261 	struct tc_cls_flower_offload cls_flower = {};
262 	int err;
263 
264 	if (!tc_can_offload(dev)) {
265 		if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
266 		    (f->hw_dev && !tc_can_offload(f->hw_dev))) {
267 			f->hw_dev = dev;
268 			return tc_skip_sw(f->flags) ? -EINVAL : 0;
269 		}
270 		dev = f->hw_dev;
271 		cls_flower.egress_dev = true;
272 	} else {
273 		f->hw_dev = dev;
274 	}
275 
276 	tc_cls_common_offload_init(&cls_flower.common, tp);
277 	cls_flower.command = TC_CLSFLOWER_REPLACE;
278 	cls_flower.cookie = (unsigned long) f;
279 	cls_flower.dissector = dissector;
280 	cls_flower.mask = mask;
281 	cls_flower.key = &f->mkey;
282 	cls_flower.exts = &f->exts;
283 
284 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
285 					    &cls_flower);
286 	if (!err)
287 		f->flags |= TCA_CLS_FLAGS_IN_HW;
288 
289 	if (tc_skip_sw(f->flags))
290 		return err;
291 	return 0;
292 }
293 
294 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
295 {
296 	struct tc_cls_flower_offload cls_flower = {};
297 	struct net_device *dev = f->hw_dev;
298 
299 	if (!tc_can_offload(dev))
300 		return;
301 
302 	tc_cls_common_offload_init(&cls_flower.common, tp);
303 	cls_flower.command = TC_CLSFLOWER_STATS;
304 	cls_flower.cookie = (unsigned long) f;
305 	cls_flower.exts = &f->exts;
306 	cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev;
307 
308 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
309 				      &cls_flower);
310 }
311 
312 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
313 {
314 	struct cls_fl_head *head = rtnl_dereference(tp->root);
315 
316 	idr_remove_ext(&head->handle_idr, f->handle);
317 	list_del_rcu(&f->list);
318 	if (!tc_skip_hw(f->flags))
319 		fl_hw_destroy_filter(tp, f);
320 	tcf_unbind_filter(tp, &f->res);
321 	call_rcu(&f->rcu, fl_destroy_filter);
322 }
323 
324 static void fl_destroy_sleepable(struct work_struct *work)
325 {
326 	struct cls_fl_head *head = container_of(work, struct cls_fl_head,
327 						work);
328 	if (head->mask_assigned)
329 		rhashtable_destroy(&head->ht);
330 	kfree(head);
331 	module_put(THIS_MODULE);
332 }
333 
334 static void fl_destroy_rcu(struct rcu_head *rcu)
335 {
336 	struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu);
337 
338 	INIT_WORK(&head->work, fl_destroy_sleepable);
339 	schedule_work(&head->work);
340 }
341 
342 static void fl_destroy(struct tcf_proto *tp)
343 {
344 	struct cls_fl_head *head = rtnl_dereference(tp->root);
345 	struct cls_fl_filter *f, *next;
346 
347 	list_for_each_entry_safe(f, next, &head->filters, list)
348 		__fl_delete(tp, f);
349 	idr_destroy(&head->handle_idr);
350 
351 	__module_get(THIS_MODULE);
352 	call_rcu(&head->rcu, fl_destroy_rcu);
353 }
354 
355 static void *fl_get(struct tcf_proto *tp, u32 handle)
356 {
357 	struct cls_fl_head *head = rtnl_dereference(tp->root);
358 
359 	return idr_find_ext(&head->handle_idr, handle);
360 }
361 
362 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
363 	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
364 	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
365 	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
366 					    .len = IFNAMSIZ },
367 	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
368 	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
369 	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
370 	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
371 	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
372 	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
373 	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
374 	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
375 	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
376 	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
377 	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
378 	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
379 	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
380 	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
381 	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
382 	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
383 	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
384 	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
385 	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
386 	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
387 	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
388 	[TCA_FLOWER_KEY_ENC_KEY_ID]	= { .type = NLA_U32 },
389 	[TCA_FLOWER_KEY_ENC_IPV4_SRC]	= { .type = NLA_U32 },
390 	[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
391 	[TCA_FLOWER_KEY_ENC_IPV4_DST]	= { .type = NLA_U32 },
392 	[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
393 	[TCA_FLOWER_KEY_ENC_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
394 	[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
395 	[TCA_FLOWER_KEY_ENC_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
396 	[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
397 	[TCA_FLOWER_KEY_TCP_SRC_MASK]	= { .type = NLA_U16 },
398 	[TCA_FLOWER_KEY_TCP_DST_MASK]	= { .type = NLA_U16 },
399 	[TCA_FLOWER_KEY_UDP_SRC_MASK]	= { .type = NLA_U16 },
400 	[TCA_FLOWER_KEY_UDP_DST_MASK]	= { .type = NLA_U16 },
401 	[TCA_FLOWER_KEY_SCTP_SRC_MASK]	= { .type = NLA_U16 },
402 	[TCA_FLOWER_KEY_SCTP_DST_MASK]	= { .type = NLA_U16 },
403 	[TCA_FLOWER_KEY_SCTP_SRC]	= { .type = NLA_U16 },
404 	[TCA_FLOWER_KEY_SCTP_DST]	= { .type = NLA_U16 },
405 	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]	= { .type = NLA_U16 },
406 	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]	= { .type = NLA_U16 },
407 	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]	= { .type = NLA_U16 },
408 	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]	= { .type = NLA_U16 },
409 	[TCA_FLOWER_KEY_FLAGS]		= { .type = NLA_U32 },
410 	[TCA_FLOWER_KEY_FLAGS_MASK]	= { .type = NLA_U32 },
411 	[TCA_FLOWER_KEY_ICMPV4_TYPE]	= { .type = NLA_U8 },
412 	[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
413 	[TCA_FLOWER_KEY_ICMPV4_CODE]	= { .type = NLA_U8 },
414 	[TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
415 	[TCA_FLOWER_KEY_ICMPV6_TYPE]	= { .type = NLA_U8 },
416 	[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
417 	[TCA_FLOWER_KEY_ICMPV6_CODE]	= { .type = NLA_U8 },
418 	[TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
419 	[TCA_FLOWER_KEY_ARP_SIP]	= { .type = NLA_U32 },
420 	[TCA_FLOWER_KEY_ARP_SIP_MASK]	= { .type = NLA_U32 },
421 	[TCA_FLOWER_KEY_ARP_TIP]	= { .type = NLA_U32 },
422 	[TCA_FLOWER_KEY_ARP_TIP_MASK]	= { .type = NLA_U32 },
423 	[TCA_FLOWER_KEY_ARP_OP]		= { .type = NLA_U8 },
424 	[TCA_FLOWER_KEY_ARP_OP_MASK]	= { .type = NLA_U8 },
425 	[TCA_FLOWER_KEY_ARP_SHA]	= { .len = ETH_ALEN },
426 	[TCA_FLOWER_KEY_ARP_SHA_MASK]	= { .len = ETH_ALEN },
427 	[TCA_FLOWER_KEY_ARP_THA]	= { .len = ETH_ALEN },
428 	[TCA_FLOWER_KEY_ARP_THA_MASK]	= { .len = ETH_ALEN },
429 	[TCA_FLOWER_KEY_MPLS_TTL]	= { .type = NLA_U8 },
430 	[TCA_FLOWER_KEY_MPLS_BOS]	= { .type = NLA_U8 },
431 	[TCA_FLOWER_KEY_MPLS_TC]	= { .type = NLA_U8 },
432 	[TCA_FLOWER_KEY_MPLS_LABEL]	= { .type = NLA_U32 },
433 	[TCA_FLOWER_KEY_TCP_FLAGS]	= { .type = NLA_U16 },
434 	[TCA_FLOWER_KEY_TCP_FLAGS_MASK]	= { .type = NLA_U16 },
435 	[TCA_FLOWER_KEY_IP_TOS]		= { .type = NLA_U8 },
436 	[TCA_FLOWER_KEY_IP_TOS_MASK]	= { .type = NLA_U8 },
437 	[TCA_FLOWER_KEY_IP_TTL]		= { .type = NLA_U8 },
438 	[TCA_FLOWER_KEY_IP_TTL_MASK]	= { .type = NLA_U8 },
439 };
440 
441 static void fl_set_key_val(struct nlattr **tb,
442 			   void *val, int val_type,
443 			   void *mask, int mask_type, int len)
444 {
445 	if (!tb[val_type])
446 		return;
447 	memcpy(val, nla_data(tb[val_type]), len);
448 	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
449 		memset(mask, 0xff, len);
450 	else
451 		memcpy(mask, nla_data(tb[mask_type]), len);
452 }
453 
454 static int fl_set_key_mpls(struct nlattr **tb,
455 			   struct flow_dissector_key_mpls *key_val,
456 			   struct flow_dissector_key_mpls *key_mask)
457 {
458 	if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
459 		key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
460 		key_mask->mpls_ttl = MPLS_TTL_MASK;
461 	}
462 	if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
463 		u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
464 
465 		if (bos & ~MPLS_BOS_MASK)
466 			return -EINVAL;
467 		key_val->mpls_bos = bos;
468 		key_mask->mpls_bos = MPLS_BOS_MASK;
469 	}
470 	if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
471 		u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);
472 
473 		if (tc & ~MPLS_TC_MASK)
474 			return -EINVAL;
475 		key_val->mpls_tc = tc;
476 		key_mask->mpls_tc = MPLS_TC_MASK;
477 	}
478 	if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
479 		u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);
480 
481 		if (label & ~MPLS_LABEL_MASK)
482 			return -EINVAL;
483 		key_val->mpls_label = label;
484 		key_mask->mpls_label = MPLS_LABEL_MASK;
485 	}
486 	return 0;
487 }
488 
489 static void fl_set_key_vlan(struct nlattr **tb,
490 			    struct flow_dissector_key_vlan *key_val,
491 			    struct flow_dissector_key_vlan *key_mask)
492 {
493 #define VLAN_PRIORITY_MASK	0x7
494 
495 	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
496 		key_val->vlan_id =
497 			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
498 		key_mask->vlan_id = VLAN_VID_MASK;
499 	}
500 	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
501 		key_val->vlan_priority =
502 			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
503 			VLAN_PRIORITY_MASK;
504 		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
505 	}
506 }
507 
508 static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
509 			    u32 *dissector_key, u32 *dissector_mask,
510 			    u32 flower_flag_bit, u32 dissector_flag_bit)
511 {
512 	if (flower_mask & flower_flag_bit) {
513 		*dissector_mask |= dissector_flag_bit;
514 		if (flower_key & flower_flag_bit)
515 			*dissector_key |= dissector_flag_bit;
516 	}
517 }
518 
519 static int fl_set_key_flags(struct nlattr **tb,
520 			    u32 *flags_key, u32 *flags_mask)
521 {
522 	u32 key, mask;
523 
524 	/* mask is mandatory for flags */
525 	if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
526 		return -EINVAL;
527 
528 	key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
529 	mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
530 
531 	*flags_key  = 0;
532 	*flags_mask = 0;
533 
534 	fl_set_key_flag(key, mask, flags_key, flags_mask,
535 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
536 
537 	return 0;
538 }
539 
540 static void fl_set_key_ip(struct nlattr **tb,
541 			  struct flow_dissector_key_ip *key,
542 			  struct flow_dissector_key_ip *mask)
543 {
544 		fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS,
545 			       &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK,
546 			       sizeof(key->tos));
547 
548 		fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL,
549 			       &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK,
550 			       sizeof(key->ttl));
551 }
552 
553 static int fl_set_key(struct net *net, struct nlattr **tb,
554 		      struct fl_flow_key *key, struct fl_flow_key *mask)
555 {
556 	__be16 ethertype;
557 	int ret = 0;
558 #ifdef CONFIG_NET_CLS_IND
559 	if (tb[TCA_FLOWER_INDEV]) {
560 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
561 		if (err < 0)
562 			return err;
563 		key->indev_ifindex = err;
564 		mask->indev_ifindex = 0xffffffff;
565 	}
566 #endif
567 
568 	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
569 		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
570 		       sizeof(key->eth.dst));
571 	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
572 		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
573 		       sizeof(key->eth.src));
574 
575 	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
576 		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
577 
578 		if (ethertype == htons(ETH_P_8021Q)) {
579 			fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
580 			fl_set_key_val(tb, &key->basic.n_proto,
581 				       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
582 				       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
583 				       sizeof(key->basic.n_proto));
584 		} else {
585 			key->basic.n_proto = ethertype;
586 			mask->basic.n_proto = cpu_to_be16(~0);
587 		}
588 	}
589 
590 	if (key->basic.n_proto == htons(ETH_P_IP) ||
591 	    key->basic.n_proto == htons(ETH_P_IPV6)) {
592 		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
593 			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
594 			       sizeof(key->basic.ip_proto));
595 		fl_set_key_ip(tb, &key->ip, &mask->ip);
596 	}
597 
598 	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
599 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
600 		mask->control.addr_type = ~0;
601 		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
602 			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
603 			       sizeof(key->ipv4.src));
604 		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
605 			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
606 			       sizeof(key->ipv4.dst));
607 	} else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
608 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
609 		mask->control.addr_type = ~0;
610 		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
611 			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
612 			       sizeof(key->ipv6.src));
613 		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
614 			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
615 			       sizeof(key->ipv6.dst));
616 	}
617 
618 	if (key->basic.ip_proto == IPPROTO_TCP) {
619 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
620 			       &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
621 			       sizeof(key->tp.src));
622 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
623 			       &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
624 			       sizeof(key->tp.dst));
625 		fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
626 			       &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
627 			       sizeof(key->tcp.flags));
628 	} else if (key->basic.ip_proto == IPPROTO_UDP) {
629 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
630 			       &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
631 			       sizeof(key->tp.src));
632 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
633 			       &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
634 			       sizeof(key->tp.dst));
635 	} else if (key->basic.ip_proto == IPPROTO_SCTP) {
636 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
637 			       &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
638 			       sizeof(key->tp.src));
639 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
640 			       &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
641 			       sizeof(key->tp.dst));
642 	} else if (key->basic.n_proto == htons(ETH_P_IP) &&
643 		   key->basic.ip_proto == IPPROTO_ICMP) {
644 		fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
645 			       &mask->icmp.type,
646 			       TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
647 			       sizeof(key->icmp.type));
648 		fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
649 			       &mask->icmp.code,
650 			       TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
651 			       sizeof(key->icmp.code));
652 	} else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
653 		   key->basic.ip_proto == IPPROTO_ICMPV6) {
654 		fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
655 			       &mask->icmp.type,
656 			       TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
657 			       sizeof(key->icmp.type));
658 		fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE,
659 			       &mask->icmp.code,
660 			       TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
661 			       sizeof(key->icmp.code));
662 	} else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) ||
663 		   key->basic.n_proto == htons(ETH_P_MPLS_MC)) {
664 		ret = fl_set_key_mpls(tb, &key->mpls, &mask->mpls);
665 		if (ret)
666 			return ret;
667 	} else if (key->basic.n_proto == htons(ETH_P_ARP) ||
668 		   key->basic.n_proto == htons(ETH_P_RARP)) {
669 		fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
670 			       &mask->arp.sip, TCA_FLOWER_KEY_ARP_SIP_MASK,
671 			       sizeof(key->arp.sip));
672 		fl_set_key_val(tb, &key->arp.tip, TCA_FLOWER_KEY_ARP_TIP,
673 			       &mask->arp.tip, TCA_FLOWER_KEY_ARP_TIP_MASK,
674 			       sizeof(key->arp.tip));
675 		fl_set_key_val(tb, &key->arp.op, TCA_FLOWER_KEY_ARP_OP,
676 			       &mask->arp.op, TCA_FLOWER_KEY_ARP_OP_MASK,
677 			       sizeof(key->arp.op));
678 		fl_set_key_val(tb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
679 			       mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
680 			       sizeof(key->arp.sha));
681 		fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
682 			       mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
683 			       sizeof(key->arp.tha));
684 	}
685 
686 	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
687 	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
688 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
689 		mask->enc_control.addr_type = ~0;
690 		fl_set_key_val(tb, &key->enc_ipv4.src,
691 			       TCA_FLOWER_KEY_ENC_IPV4_SRC,
692 			       &mask->enc_ipv4.src,
693 			       TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
694 			       sizeof(key->enc_ipv4.src));
695 		fl_set_key_val(tb, &key->enc_ipv4.dst,
696 			       TCA_FLOWER_KEY_ENC_IPV4_DST,
697 			       &mask->enc_ipv4.dst,
698 			       TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
699 			       sizeof(key->enc_ipv4.dst));
700 	}
701 
702 	if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
703 	    tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
704 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
705 		mask->enc_control.addr_type = ~0;
706 		fl_set_key_val(tb, &key->enc_ipv6.src,
707 			       TCA_FLOWER_KEY_ENC_IPV6_SRC,
708 			       &mask->enc_ipv6.src,
709 			       TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
710 			       sizeof(key->enc_ipv6.src));
711 		fl_set_key_val(tb, &key->enc_ipv6.dst,
712 			       TCA_FLOWER_KEY_ENC_IPV6_DST,
713 			       &mask->enc_ipv6.dst,
714 			       TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
715 			       sizeof(key->enc_ipv6.dst));
716 	}
717 
718 	fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
719 		       &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
720 		       sizeof(key->enc_key_id.keyid));
721 
722 	fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
723 		       &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
724 		       sizeof(key->enc_tp.src));
725 
726 	fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
727 		       &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
728 		       sizeof(key->enc_tp.dst));
729 
730 	if (tb[TCA_FLOWER_KEY_FLAGS])
731 		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
732 
733 	return ret;
734 }
735 
736 static bool fl_mask_eq(struct fl_flow_mask *mask1,
737 		       struct fl_flow_mask *mask2)
738 {
739 	const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
740 	const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
741 
742 	return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
743 	       !memcmp(lmask1, lmask2, fl_mask_range(mask1));
744 }
745 
746 static const struct rhashtable_params fl_ht_params = {
747 	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
748 	.head_offset = offsetof(struct cls_fl_filter, ht_node),
749 	.automatic_shrinking = true,
750 };
751 
752 static int fl_init_hashtable(struct cls_fl_head *head,
753 			     struct fl_flow_mask *mask)
754 {
755 	head->ht_params = fl_ht_params;
756 	head->ht_params.key_len = fl_mask_range(mask);
757 	head->ht_params.key_offset += mask->range.start;
758 
759 	return rhashtable_init(&head->ht, &head->ht_params);
760 }
761 
762 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
763 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
764 
765 #define FL_KEY_IS_MASKED(mask, member)						\
766 	memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),		\
767 		   0, FL_KEY_MEMBER_SIZE(member))				\
768 
769 #define FL_KEY_SET(keys, cnt, id, member)					\
770 	do {									\
771 		keys[cnt].key_id = id;						\
772 		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
773 		cnt++;								\
774 	} while(0);
775 
776 #define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)			\
777 	do {									\
778 		if (FL_KEY_IS_MASKED(mask, member))				\
779 			FL_KEY_SET(keys, cnt, id, member);			\
780 	} while(0);
781 
782 static void fl_init_dissector(struct cls_fl_head *head,
783 			      struct fl_flow_mask *mask)
784 {
785 	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
786 	size_t cnt = 0;
787 
788 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
789 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
790 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
791 			     FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
792 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
793 			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
794 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
795 			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
796 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
797 			     FLOW_DISSECTOR_KEY_PORTS, tp);
798 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
799 			     FLOW_DISSECTOR_KEY_IP, ip);
800 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
801 			     FLOW_DISSECTOR_KEY_TCP, tcp);
802 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
803 			     FLOW_DISSECTOR_KEY_ICMP, icmp);
804 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
805 			     FLOW_DISSECTOR_KEY_ARP, arp);
806 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
807 			     FLOW_DISSECTOR_KEY_MPLS, mpls);
808 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
809 			     FLOW_DISSECTOR_KEY_VLAN, vlan);
810 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
811 			     FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
812 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
813 			     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
814 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
815 			     FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
816 	if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
817 	    FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
818 		FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
819 			   enc_control);
820 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
821 			     FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
822 
823 	skb_flow_dissector_init(&head->dissector, keys, cnt);
824 }
825 
826 static int fl_check_assign_mask(struct cls_fl_head *head,
827 				struct fl_flow_mask *mask)
828 {
829 	int err;
830 
831 	if (head->mask_assigned) {
832 		if (!fl_mask_eq(&head->mask, mask))
833 			return -EINVAL;
834 		else
835 			return 0;
836 	}
837 
838 	/* Mask is not assigned yet. So assign it and init hashtable
839 	 * according to that.
840 	 */
841 	err = fl_init_hashtable(head, mask);
842 	if (err)
843 		return err;
844 	memcpy(&head->mask, mask, sizeof(head->mask));
845 	head->mask_assigned = true;
846 
847 	fl_init_dissector(head, mask);
848 
849 	return 0;
850 }
851 
852 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
853 			struct cls_fl_filter *f, struct fl_flow_mask *mask,
854 			unsigned long base, struct nlattr **tb,
855 			struct nlattr *est, bool ovr)
856 {
857 	int err;
858 
859 	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
860 	if (err < 0)
861 		return err;
862 
863 	if (tb[TCA_FLOWER_CLASSID]) {
864 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
865 		tcf_bind_filter(tp, &f->res, base);
866 	}
867 
868 	err = fl_set_key(net, tb, &f->key, &mask->key);
869 	if (err)
870 		return err;
871 
872 	fl_mask_update_range(mask);
873 	fl_set_masked_key(&f->mkey, &f->key, mask);
874 
875 	return 0;
876 }
877 
878 static int fl_change(struct net *net, struct sk_buff *in_skb,
879 		     struct tcf_proto *tp, unsigned long base,
880 		     u32 handle, struct nlattr **tca,
881 		     void **arg, bool ovr)
882 {
883 	struct cls_fl_head *head = rtnl_dereference(tp->root);
884 	struct cls_fl_filter *fold = *arg;
885 	struct cls_fl_filter *fnew;
886 	struct nlattr **tb;
887 	struct fl_flow_mask mask = {};
888 	unsigned long idr_index;
889 	int err;
890 
891 	if (!tca[TCA_OPTIONS])
892 		return -EINVAL;
893 
894 	tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
895 	if (!tb)
896 		return -ENOBUFS;
897 
898 	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
899 			       fl_policy, NULL);
900 	if (err < 0)
901 		goto errout_tb;
902 
903 	if (fold && handle && fold->handle != handle) {
904 		err = -EINVAL;
905 		goto errout_tb;
906 	}
907 
908 	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
909 	if (!fnew) {
910 		err = -ENOBUFS;
911 		goto errout_tb;
912 	}
913 
914 	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
915 	if (err < 0)
916 		goto errout;
917 
918 	if (!handle) {
919 		err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
920 				    1, 0x80000000, GFP_KERNEL);
921 		if (err)
922 			goto errout;
923 		fnew->handle = idr_index;
924 	}
925 
926 	/* user specifies a handle and it doesn't exist */
927 	if (handle && !fold) {
928 		err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
929 				    handle, handle + 1, GFP_KERNEL);
930 		if (err)
931 			goto errout;
932 		fnew->handle = idr_index;
933 	}
934 
935 	if (tb[TCA_FLOWER_FLAGS]) {
936 		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
937 
938 		if (!tc_flags_valid(fnew->flags)) {
939 			err = -EINVAL;
940 			goto errout_idr;
941 		}
942 	}
943 
944 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
945 	if (err)
946 		goto errout_idr;
947 
948 	err = fl_check_assign_mask(head, &mask);
949 	if (err)
950 		goto errout_idr;
951 
952 	if (!tc_skip_sw(fnew->flags)) {
953 		if (!fold && fl_lookup(head, &fnew->mkey)) {
954 			err = -EEXIST;
955 			goto errout_idr;
956 		}
957 
958 		err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
959 					     head->ht_params);
960 		if (err)
961 			goto errout_idr;
962 	}
963 
964 	if (!tc_skip_hw(fnew->flags)) {
965 		err = fl_hw_replace_filter(tp,
966 					   &head->dissector,
967 					   &mask.key,
968 					   fnew);
969 		if (err)
970 			goto errout_idr;
971 	}
972 
973 	if (!tc_in_hw(fnew->flags))
974 		fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
975 
976 	if (fold) {
977 		if (!tc_skip_sw(fold->flags))
978 			rhashtable_remove_fast(&head->ht, &fold->ht_node,
979 					       head->ht_params);
980 		if (!tc_skip_hw(fold->flags))
981 			fl_hw_destroy_filter(tp, fold);
982 	}
983 
984 	*arg = fnew;
985 
986 	if (fold) {
987 		fnew->handle = handle;
988 		idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
989 		list_replace_rcu(&fold->list, &fnew->list);
990 		tcf_unbind_filter(tp, &fold->res);
991 		call_rcu(&fold->rcu, fl_destroy_filter);
992 	} else {
993 		list_add_tail_rcu(&fnew->list, &head->filters);
994 	}
995 
996 	kfree(tb);
997 	return 0;
998 
999 errout_idr:
1000 	if (fnew->handle)
1001 		idr_remove_ext(&head->handle_idr, fnew->handle);
1002 errout:
1003 	tcf_exts_destroy(&fnew->exts);
1004 	kfree(fnew);
1005 errout_tb:
1006 	kfree(tb);
1007 	return err;
1008 }
1009 
1010 static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
1011 {
1012 	struct cls_fl_head *head = rtnl_dereference(tp->root);
1013 	struct cls_fl_filter *f = arg;
1014 
1015 	if (!tc_skip_sw(f->flags))
1016 		rhashtable_remove_fast(&head->ht, &f->ht_node,
1017 				       head->ht_params);
1018 	__fl_delete(tp, f);
1019 	*last = list_empty(&head->filters);
1020 	return 0;
1021 }
1022 
1023 static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
1024 {
1025 	struct cls_fl_head *head = rtnl_dereference(tp->root);
1026 	struct cls_fl_filter *f;
1027 
1028 	list_for_each_entry_rcu(f, &head->filters, list) {
1029 		if (arg->count < arg->skip)
1030 			goto skip;
1031 		if (arg->fn(tp, f, arg) < 0) {
1032 			arg->stop = 1;
1033 			break;
1034 		}
1035 skip:
1036 		arg->count++;
1037 	}
1038 }
1039 
1040 static int fl_dump_key_val(struct sk_buff *skb,
1041 			   void *val, int val_type,
1042 			   void *mask, int mask_type, int len)
1043 {
1044 	int err;
1045 
1046 	if (!memchr_inv(mask, 0, len))
1047 		return 0;
1048 	err = nla_put(skb, val_type, len, val);
1049 	if (err)
1050 		return err;
1051 	if (mask_type != TCA_FLOWER_UNSPEC) {
1052 		err = nla_put(skb, mask_type, len, mask);
1053 		if (err)
1054 			return err;
1055 	}
1056 	return 0;
1057 }
1058 
1059 static int fl_dump_key_mpls(struct sk_buff *skb,
1060 			    struct flow_dissector_key_mpls *mpls_key,
1061 			    struct flow_dissector_key_mpls *mpls_mask)
1062 {
1063 	int err;
1064 
1065 	if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
1066 		return 0;
1067 	if (mpls_mask->mpls_ttl) {
1068 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
1069 				 mpls_key->mpls_ttl);
1070 		if (err)
1071 			return err;
1072 	}
1073 	if (mpls_mask->mpls_tc) {
1074 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
1075 				 mpls_key->mpls_tc);
1076 		if (err)
1077 			return err;
1078 	}
1079 	if (mpls_mask->mpls_label) {
1080 		err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
1081 				  mpls_key->mpls_label);
1082 		if (err)
1083 			return err;
1084 	}
1085 	if (mpls_mask->mpls_bos) {
1086 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
1087 				 mpls_key->mpls_bos);
1088 		if (err)
1089 			return err;
1090 	}
1091 	return 0;
1092 }
1093 
1094 static int fl_dump_key_ip(struct sk_buff *skb,
1095 			  struct flow_dissector_key_ip *key,
1096 			  struct flow_dissector_key_ip *mask)
1097 {
1098 	if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos,
1099 			    TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) ||
1100 	    fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl,
1101 			    TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl)))
1102 		return -1;
1103 
1104 	return 0;
1105 }
1106 
1107 static int fl_dump_key_vlan(struct sk_buff *skb,
1108 			    struct flow_dissector_key_vlan *vlan_key,
1109 			    struct flow_dissector_key_vlan *vlan_mask)
1110 {
1111 	int err;
1112 
1113 	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
1114 		return 0;
1115 	if (vlan_mask->vlan_id) {
1116 		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
1117 				  vlan_key->vlan_id);
1118 		if (err)
1119 			return err;
1120 	}
1121 	if (vlan_mask->vlan_priority) {
1122 		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
1123 				 vlan_key->vlan_priority);
1124 		if (err)
1125 			return err;
1126 	}
1127 	return 0;
1128 }
1129 
1130 static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
1131 			    u32 *flower_key, u32 *flower_mask,
1132 			    u32 flower_flag_bit, u32 dissector_flag_bit)
1133 {
1134 	if (dissector_mask & dissector_flag_bit) {
1135 		*flower_mask |= flower_flag_bit;
1136 		if (dissector_key & dissector_flag_bit)
1137 			*flower_key |= flower_flag_bit;
1138 	}
1139 }
1140 
1141 static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
1142 {
1143 	u32 key, mask;
1144 	__be32 _key, _mask;
1145 	int err;
1146 
1147 	if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
1148 		return 0;
1149 
1150 	key = 0;
1151 	mask = 0;
1152 
1153 	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
1154 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
1155 
1156 	_key = cpu_to_be32(key);
1157 	_mask = cpu_to_be32(mask);
1158 
1159 	err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
1160 	if (err)
1161 		return err;
1162 
1163 	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
1164 }
1165 
1166 static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
1167 		   struct sk_buff *skb, struct tcmsg *t)
1168 {
1169 	struct cls_fl_head *head = rtnl_dereference(tp->root);
1170 	struct cls_fl_filter *f = fh;
1171 	struct nlattr *nest;
1172 	struct fl_flow_key *key, *mask;
1173 
1174 	if (!f)
1175 		return skb->len;
1176 
1177 	t->tcm_handle = f->handle;
1178 
1179 	nest = nla_nest_start(skb, TCA_OPTIONS);
1180 	if (!nest)
1181 		goto nla_put_failure;
1182 
1183 	if (f->res.classid &&
1184 	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
1185 		goto nla_put_failure;
1186 
1187 	key = &f->key;
1188 	mask = &head->mask.key;
1189 
1190 	if (mask->indev_ifindex) {
1191 		struct net_device *dev;
1192 
1193 		dev = __dev_get_by_index(net, key->indev_ifindex);
1194 		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
1195 			goto nla_put_failure;
1196 	}
1197 
1198 	if (!tc_skip_hw(f->flags))
1199 		fl_hw_update_stats(tp, f);
1200 
1201 	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
1202 			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
1203 			    sizeof(key->eth.dst)) ||
1204 	    fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
1205 			    mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
1206 			    sizeof(key->eth.src)) ||
1207 	    fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
1208 			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
1209 			    sizeof(key->basic.n_proto)))
1210 		goto nla_put_failure;
1211 
1212 	if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
1213 		goto nla_put_failure;
1214 
1215 	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
1216 		goto nla_put_failure;
1217 
1218 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
1219 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
1220 	    (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
1221 			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
1222 			    sizeof(key->basic.ip_proto)) ||
1223 	    fl_dump_key_ip(skb, &key->ip, &mask->ip)))
1224 		goto nla_put_failure;
1225 
1226 	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
1227 	    (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
1228 			     &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
1229 			     sizeof(key->ipv4.src)) ||
1230 	     fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
1231 			     &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
1232 			     sizeof(key->ipv4.dst))))
1233 		goto nla_put_failure;
1234 	else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
1235 		 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
1236 				  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1237 				  sizeof(key->ipv6.src)) ||
1238 		  fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
1239 				  &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
1240 				  sizeof(key->ipv6.dst))))
1241 		goto nla_put_failure;
1242 
1243 	if (key->basic.ip_proto == IPPROTO_TCP &&
1244 	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
1245 			     &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
1246 			     sizeof(key->tp.src)) ||
1247 	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
1248 			     &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
1249 			     sizeof(key->tp.dst)) ||
1250 	     fl_dump_key_val(skb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
1251 			     &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1252 			     sizeof(key->tcp.flags))))
1253 		goto nla_put_failure;
1254 	else if (key->basic.ip_proto == IPPROTO_UDP &&
1255 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
1256 				  &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
1257 				  sizeof(key->tp.src)) ||
1258 		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
1259 				  &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
1260 				  sizeof(key->tp.dst))))
1261 		goto nla_put_failure;
1262 	else if (key->basic.ip_proto == IPPROTO_SCTP &&
1263 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
1264 				  &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
1265 				  sizeof(key->tp.src)) ||
1266 		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
1267 				  &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
1268 				  sizeof(key->tp.dst))))
1269 		goto nla_put_failure;
1270 	else if (key->basic.n_proto == htons(ETH_P_IP) &&
1271 		 key->basic.ip_proto == IPPROTO_ICMP &&
1272 		 (fl_dump_key_val(skb, &key->icmp.type,
1273 				  TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type,
1274 				  TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
1275 				  sizeof(key->icmp.type)) ||
1276 		  fl_dump_key_val(skb, &key->icmp.code,
1277 				  TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code,
1278 				  TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
1279 				  sizeof(key->icmp.code))))
1280 		goto nla_put_failure;
1281 	else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
1282 		 key->basic.ip_proto == IPPROTO_ICMPV6 &&
1283 		 (fl_dump_key_val(skb, &key->icmp.type,
1284 				  TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type,
1285 				  TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
1286 				  sizeof(key->icmp.type)) ||
1287 		  fl_dump_key_val(skb, &key->icmp.code,
1288 				  TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code,
1289 				  TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
1290 				  sizeof(key->icmp.code))))
1291 		goto nla_put_failure;
1292 	else if ((key->basic.n_proto == htons(ETH_P_ARP) ||
1293 		  key->basic.n_proto == htons(ETH_P_RARP)) &&
1294 		 (fl_dump_key_val(skb, &key->arp.sip,
1295 				  TCA_FLOWER_KEY_ARP_SIP, &mask->arp.sip,
1296 				  TCA_FLOWER_KEY_ARP_SIP_MASK,
1297 				  sizeof(key->arp.sip)) ||
1298 		  fl_dump_key_val(skb, &key->arp.tip,
1299 				  TCA_FLOWER_KEY_ARP_TIP, &mask->arp.tip,
1300 				  TCA_FLOWER_KEY_ARP_TIP_MASK,
1301 				  sizeof(key->arp.tip)) ||
1302 		  fl_dump_key_val(skb, &key->arp.op,
1303 				  TCA_FLOWER_KEY_ARP_OP, &mask->arp.op,
1304 				  TCA_FLOWER_KEY_ARP_OP_MASK,
1305 				  sizeof(key->arp.op)) ||
1306 		  fl_dump_key_val(skb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
1307 				  mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
1308 				  sizeof(key->arp.sha)) ||
1309 		  fl_dump_key_val(skb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
1310 				  mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
1311 				  sizeof(key->arp.tha))))
1312 		goto nla_put_failure;
1313 
1314 	if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
1315 	    (fl_dump_key_val(skb, &key->enc_ipv4.src,
1316 			    TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
1317 			    TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
1318 			    sizeof(key->enc_ipv4.src)) ||
1319 	     fl_dump_key_val(skb, &key->enc_ipv4.dst,
1320 			     TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
1321 			     TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
1322 			     sizeof(key->enc_ipv4.dst))))
1323 		goto nla_put_failure;
1324 	else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
1325 		 (fl_dump_key_val(skb, &key->enc_ipv6.src,
1326 			    TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
1327 			    TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
1328 			    sizeof(key->enc_ipv6.src)) ||
1329 		 fl_dump_key_val(skb, &key->enc_ipv6.dst,
1330 				 TCA_FLOWER_KEY_ENC_IPV6_DST,
1331 				 &mask->enc_ipv6.dst,
1332 				 TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
1333 			    sizeof(key->enc_ipv6.dst))))
1334 		goto nla_put_failure;
1335 
1336 	if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
1337 			    &mask->enc_key_id, TCA_FLOWER_UNSPEC,
1338 			    sizeof(key->enc_key_id)) ||
1339 	    fl_dump_key_val(skb, &key->enc_tp.src,
1340 			    TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
1341 			    &mask->enc_tp.src,
1342 			    TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
1343 			    sizeof(key->enc_tp.src)) ||
1344 	    fl_dump_key_val(skb, &key->enc_tp.dst,
1345 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
1346 			    &mask->enc_tp.dst,
1347 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
1348 			    sizeof(key->enc_tp.dst)))
1349 		goto nla_put_failure;
1350 
1351 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
1352 		goto nla_put_failure;
1353 
1354 	if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
1355 		goto nla_put_failure;
1356 
1357 	if (tcf_exts_dump(skb, &f->exts))
1358 		goto nla_put_failure;
1359 
1360 	nla_nest_end(skb, nest);
1361 
1362 	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
1363 		goto nla_put_failure;
1364 
1365 	return skb->len;
1366 
1367 nla_put_failure:
1368 	nla_nest_cancel(skb, nest);
1369 	return -1;
1370 }
1371 
1372 static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
1373 {
1374 	struct cls_fl_filter *f = fh;
1375 
1376 	if (f && f->res.classid == classid)
1377 		f->res.class = cl;
1378 }
1379 
1380 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
1381 	.kind		= "flower",
1382 	.classify	= fl_classify,
1383 	.init		= fl_init,
1384 	.destroy	= fl_destroy,
1385 	.get		= fl_get,
1386 	.change		= fl_change,
1387 	.delete		= fl_delete,
1388 	.walk		= fl_walk,
1389 	.dump		= fl_dump,
1390 	.bind_class	= fl_bind_class,
1391 	.owner		= THIS_MODULE,
1392 };
1393 
1394 static int __init cls_fl_init(void)
1395 {
1396 	return register_tcf_proto_ops(&cls_fl_ops);
1397 }
1398 
1399 static void __exit cls_fl_exit(void)
1400 {
1401 	unregister_tcf_proto_ops(&cls_fl_ops);
1402 }
1403 
1404 module_init(cls_fl_init);
1405 module_exit(cls_fl_exit);
1406 
1407 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
1408 MODULE_DESCRIPTION("Flower classifier");
1409 MODULE_LICENSE("GPL v2");
1410