xref: /openbmc/linux/net/sched/cls_flower.c (revision a86854d0)
1 /*
2  * net/sched/cls_flower.c		Flower classifier
3  *
4  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/rhashtable.h>
16 #include <linux/workqueue.h>
17 
18 #include <linux/if_ether.h>
19 #include <linux/in6.h>
20 #include <linux/ip.h>
21 #include <linux/mpls.h>
22 
23 #include <net/sch_generic.h>
24 #include <net/pkt_cls.h>
25 #include <net/ip.h>
26 #include <net/flow_dissector.h>
27 
28 #include <net/dst.h>
29 #include <net/dst_metadata.h>
30 
31 struct fl_flow_key {
32 	int	indev_ifindex;
33 	struct flow_dissector_key_control control;
34 	struct flow_dissector_key_control enc_control;
35 	struct flow_dissector_key_basic basic;
36 	struct flow_dissector_key_eth_addrs eth;
37 	struct flow_dissector_key_vlan vlan;
38 	union {
39 		struct flow_dissector_key_ipv4_addrs ipv4;
40 		struct flow_dissector_key_ipv6_addrs ipv6;
41 	};
42 	struct flow_dissector_key_ports tp;
43 	struct flow_dissector_key_icmp icmp;
44 	struct flow_dissector_key_arp arp;
45 	struct flow_dissector_key_keyid enc_key_id;
46 	union {
47 		struct flow_dissector_key_ipv4_addrs enc_ipv4;
48 		struct flow_dissector_key_ipv6_addrs enc_ipv6;
49 	};
50 	struct flow_dissector_key_ports enc_tp;
51 	struct flow_dissector_key_mpls mpls;
52 	struct flow_dissector_key_tcp tcp;
53 	struct flow_dissector_key_ip ip;
54 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
55 
56 struct fl_flow_mask_range {
57 	unsigned short int start;
58 	unsigned short int end;
59 };
60 
61 struct fl_flow_mask {
62 	struct fl_flow_key key;
63 	struct fl_flow_mask_range range;
64 	struct rhash_head ht_node;
65 	struct rhashtable ht;
66 	struct rhashtable_params filter_ht_params;
67 	struct flow_dissector dissector;
68 	struct list_head filters;
69 	struct rcu_head rcu;
70 	struct list_head list;
71 };
72 
73 struct cls_fl_head {
74 	struct rhashtable ht;
75 	struct list_head masks;
76 	struct rcu_work rwork;
77 	struct idr handle_idr;
78 };
79 
80 struct cls_fl_filter {
81 	struct fl_flow_mask *mask;
82 	struct rhash_head ht_node;
83 	struct fl_flow_key mkey;
84 	struct tcf_exts exts;
85 	struct tcf_result res;
86 	struct fl_flow_key key;
87 	struct list_head list;
88 	u32 handle;
89 	u32 flags;
90 	struct rcu_work rwork;
91 	struct net_device *hw_dev;
92 };
93 
94 static const struct rhashtable_params mask_ht_params = {
95 	.key_offset = offsetof(struct fl_flow_mask, key),
96 	.key_len = sizeof(struct fl_flow_key),
97 	.head_offset = offsetof(struct fl_flow_mask, ht_node),
98 	.automatic_shrinking = true,
99 };
100 
101 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
102 {
103 	return mask->range.end - mask->range.start;
104 }
105 
106 static void fl_mask_update_range(struct fl_flow_mask *mask)
107 {
108 	const u8 *bytes = (const u8 *) &mask->key;
109 	size_t size = sizeof(mask->key);
110 	size_t i, first = 0, last;
111 
112 	for (i = 0; i < size; i++) {
113 		if (bytes[i]) {
114 			first = i;
115 			break;
116 		}
117 	}
118 	last = first;
119 	for (i = size - 1; i != first; i--) {
120 		if (bytes[i]) {
121 			last = i;
122 			break;
123 		}
124 	}
125 	mask->range.start = rounddown(first, sizeof(long));
126 	mask->range.end = roundup(last + 1, sizeof(long));
127 }
128 
129 static void *fl_key_get_start(struct fl_flow_key *key,
130 			      const struct fl_flow_mask *mask)
131 {
132 	return (u8 *) key + mask->range.start;
133 }
134 
135 static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
136 			      struct fl_flow_mask *mask)
137 {
138 	const long *lkey = fl_key_get_start(key, mask);
139 	const long *lmask = fl_key_get_start(&mask->key, mask);
140 	long *lmkey = fl_key_get_start(mkey, mask);
141 	int i;
142 
143 	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
144 		*lmkey++ = *lkey++ & *lmask++;
145 }
146 
147 static void fl_clear_masked_range(struct fl_flow_key *key,
148 				  struct fl_flow_mask *mask)
149 {
150 	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
151 }
152 
153 static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
154 				       struct fl_flow_key *mkey)
155 {
156 	return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
157 				      mask->filter_ht_params);
158 }
159 
160 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
161 		       struct tcf_result *res)
162 {
163 	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
164 	struct cls_fl_filter *f;
165 	struct fl_flow_mask *mask;
166 	struct fl_flow_key skb_key;
167 	struct fl_flow_key skb_mkey;
168 
169 	list_for_each_entry_rcu(mask, &head->masks, list) {
170 		fl_clear_masked_range(&skb_key, mask);
171 
172 		skb_key.indev_ifindex = skb->skb_iif;
173 		/* skb_flow_dissect() does not set n_proto in case an unknown
174 		 * protocol, so do it rather here.
175 		 */
176 		skb_key.basic.n_proto = skb->protocol;
177 		skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
178 		skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
179 
180 		fl_set_masked_key(&skb_mkey, &skb_key, mask);
181 
182 		f = fl_lookup(mask, &skb_mkey);
183 		if (f && !tc_skip_sw(f->flags)) {
184 			*res = f->res;
185 			return tcf_exts_exec(skb, &f->exts, res);
186 		}
187 	}
188 	return -1;
189 }
190 
191 static int fl_init(struct tcf_proto *tp)
192 {
193 	struct cls_fl_head *head;
194 
195 	head = kzalloc(sizeof(*head), GFP_KERNEL);
196 	if (!head)
197 		return -ENOBUFS;
198 
199 	INIT_LIST_HEAD_RCU(&head->masks);
200 	rcu_assign_pointer(tp->root, head);
201 	idr_init(&head->handle_idr);
202 
203 	return rhashtable_init(&head->ht, &mask_ht_params);
204 }
205 
206 static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
207 			bool async)
208 {
209 	if (!list_empty(&mask->filters))
210 		return false;
211 
212 	rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
213 	rhashtable_destroy(&mask->ht);
214 	list_del_rcu(&mask->list);
215 	if (async)
216 		kfree_rcu(mask, rcu);
217 	else
218 		kfree(mask);
219 
220 	return true;
221 }
222 
223 static void __fl_destroy_filter(struct cls_fl_filter *f)
224 {
225 	tcf_exts_destroy(&f->exts);
226 	tcf_exts_put_net(&f->exts);
227 	kfree(f);
228 }
229 
230 static void fl_destroy_filter_work(struct work_struct *work)
231 {
232 	struct cls_fl_filter *f = container_of(to_rcu_work(work),
233 					struct cls_fl_filter, rwork);
234 
235 	rtnl_lock();
236 	__fl_destroy_filter(f);
237 	rtnl_unlock();
238 }
239 
240 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
241 				 struct netlink_ext_ack *extack)
242 {
243 	struct tc_cls_flower_offload cls_flower = {};
244 	struct tcf_block *block = tp->chain->block;
245 
246 	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
247 	cls_flower.command = TC_CLSFLOWER_DESTROY;
248 	cls_flower.cookie = (unsigned long) f;
249 
250 	tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
251 			 &cls_flower, false);
252 	tcf_block_offload_dec(block, &f->flags);
253 }
254 
255 static int fl_hw_replace_filter(struct tcf_proto *tp,
256 				struct cls_fl_filter *f,
257 				struct netlink_ext_ack *extack)
258 {
259 	struct tc_cls_flower_offload cls_flower = {};
260 	struct tcf_block *block = tp->chain->block;
261 	bool skip_sw = tc_skip_sw(f->flags);
262 	int err;
263 
264 	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
265 	cls_flower.command = TC_CLSFLOWER_REPLACE;
266 	cls_flower.cookie = (unsigned long) f;
267 	cls_flower.dissector = &f->mask->dissector;
268 	cls_flower.mask = &f->mask->key;
269 	cls_flower.key = &f->mkey;
270 	cls_flower.exts = &f->exts;
271 	cls_flower.classid = f->res.classid;
272 
273 	err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
274 			       &cls_flower, skip_sw);
275 	if (err < 0) {
276 		fl_hw_destroy_filter(tp, f, NULL);
277 		return err;
278 	} else if (err > 0) {
279 		tcf_block_offload_inc(block, &f->flags);
280 	}
281 
282 	if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
283 		return -EINVAL;
284 
285 	return 0;
286 }
287 
288 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
289 {
290 	struct tc_cls_flower_offload cls_flower = {};
291 	struct tcf_block *block = tp->chain->block;
292 
293 	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
294 	cls_flower.command = TC_CLSFLOWER_STATS;
295 	cls_flower.cookie = (unsigned long) f;
296 	cls_flower.exts = &f->exts;
297 	cls_flower.classid = f->res.classid;
298 
299 	tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
300 			 &cls_flower, false);
301 }
302 
303 static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
304 			struct netlink_ext_ack *extack)
305 {
306 	struct cls_fl_head *head = rtnl_dereference(tp->root);
307 	bool async = tcf_exts_get_net(&f->exts);
308 	bool last;
309 
310 	idr_remove(&head->handle_idr, f->handle);
311 	list_del_rcu(&f->list);
312 	last = fl_mask_put(head, f->mask, async);
313 	if (!tc_skip_hw(f->flags))
314 		fl_hw_destroy_filter(tp, f, extack);
315 	tcf_unbind_filter(tp, &f->res);
316 	if (async)
317 		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
318 	else
319 		__fl_destroy_filter(f);
320 
321 	return last;
322 }
323 
324 static void fl_destroy_sleepable(struct work_struct *work)
325 {
326 	struct cls_fl_head *head = container_of(to_rcu_work(work),
327 						struct cls_fl_head,
328 						rwork);
329 
330 	rhashtable_destroy(&head->ht);
331 	kfree(head);
332 	module_put(THIS_MODULE);
333 }
334 
335 static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
336 {
337 	struct cls_fl_head *head = rtnl_dereference(tp->root);
338 	struct fl_flow_mask *mask, *next_mask;
339 	struct cls_fl_filter *f, *next;
340 
341 	list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
342 		list_for_each_entry_safe(f, next, &mask->filters, list) {
343 			if (__fl_delete(tp, f, extack))
344 				break;
345 		}
346 	}
347 	idr_destroy(&head->handle_idr);
348 
349 	__module_get(THIS_MODULE);
350 	tcf_queue_work(&head->rwork, fl_destroy_sleepable);
351 }
352 
353 static void *fl_get(struct tcf_proto *tp, u32 handle)
354 {
355 	struct cls_fl_head *head = rtnl_dereference(tp->root);
356 
357 	return idr_find(&head->handle_idr, handle);
358 }
359 
360 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
361 	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
362 	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
363 	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
364 					    .len = IFNAMSIZ },
365 	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
366 	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
367 	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
368 	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
369 	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
370 	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
371 	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
372 	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
373 	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
374 	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
375 	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
376 	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
377 	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
378 	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
379 	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
380 	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
381 	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
382 	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
383 	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
384 	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
385 	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
386 	[TCA_FLOWER_KEY_ENC_KEY_ID]	= { .type = NLA_U32 },
387 	[TCA_FLOWER_KEY_ENC_IPV4_SRC]	= { .type = NLA_U32 },
388 	[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
389 	[TCA_FLOWER_KEY_ENC_IPV4_DST]	= { .type = NLA_U32 },
390 	[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
391 	[TCA_FLOWER_KEY_ENC_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
392 	[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
393 	[TCA_FLOWER_KEY_ENC_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
394 	[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
395 	[TCA_FLOWER_KEY_TCP_SRC_MASK]	= { .type = NLA_U16 },
396 	[TCA_FLOWER_KEY_TCP_DST_MASK]	= { .type = NLA_U16 },
397 	[TCA_FLOWER_KEY_UDP_SRC_MASK]	= { .type = NLA_U16 },
398 	[TCA_FLOWER_KEY_UDP_DST_MASK]	= { .type = NLA_U16 },
399 	[TCA_FLOWER_KEY_SCTP_SRC_MASK]	= { .type = NLA_U16 },
400 	[TCA_FLOWER_KEY_SCTP_DST_MASK]	= { .type = NLA_U16 },
401 	[TCA_FLOWER_KEY_SCTP_SRC]	= { .type = NLA_U16 },
402 	[TCA_FLOWER_KEY_SCTP_DST]	= { .type = NLA_U16 },
403 	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]	= { .type = NLA_U16 },
404 	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]	= { .type = NLA_U16 },
405 	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]	= { .type = NLA_U16 },
406 	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]	= { .type = NLA_U16 },
407 	[TCA_FLOWER_KEY_FLAGS]		= { .type = NLA_U32 },
408 	[TCA_FLOWER_KEY_FLAGS_MASK]	= { .type = NLA_U32 },
409 	[TCA_FLOWER_KEY_ICMPV4_TYPE]	= { .type = NLA_U8 },
410 	[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
411 	[TCA_FLOWER_KEY_ICMPV4_CODE]	= { .type = NLA_U8 },
412 	[TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
413 	[TCA_FLOWER_KEY_ICMPV6_TYPE]	= { .type = NLA_U8 },
414 	[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
415 	[TCA_FLOWER_KEY_ICMPV6_CODE]	= { .type = NLA_U8 },
416 	[TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
417 	[TCA_FLOWER_KEY_ARP_SIP]	= { .type = NLA_U32 },
418 	[TCA_FLOWER_KEY_ARP_SIP_MASK]	= { .type = NLA_U32 },
419 	[TCA_FLOWER_KEY_ARP_TIP]	= { .type = NLA_U32 },
420 	[TCA_FLOWER_KEY_ARP_TIP_MASK]	= { .type = NLA_U32 },
421 	[TCA_FLOWER_KEY_ARP_OP]		= { .type = NLA_U8 },
422 	[TCA_FLOWER_KEY_ARP_OP_MASK]	= { .type = NLA_U8 },
423 	[TCA_FLOWER_KEY_ARP_SHA]	= { .len = ETH_ALEN },
424 	[TCA_FLOWER_KEY_ARP_SHA_MASK]	= { .len = ETH_ALEN },
425 	[TCA_FLOWER_KEY_ARP_THA]	= { .len = ETH_ALEN },
426 	[TCA_FLOWER_KEY_ARP_THA_MASK]	= { .len = ETH_ALEN },
427 	[TCA_FLOWER_KEY_MPLS_TTL]	= { .type = NLA_U8 },
428 	[TCA_FLOWER_KEY_MPLS_BOS]	= { .type = NLA_U8 },
429 	[TCA_FLOWER_KEY_MPLS_TC]	= { .type = NLA_U8 },
430 	[TCA_FLOWER_KEY_MPLS_LABEL]	= { .type = NLA_U32 },
431 	[TCA_FLOWER_KEY_TCP_FLAGS]	= { .type = NLA_U16 },
432 	[TCA_FLOWER_KEY_TCP_FLAGS_MASK]	= { .type = NLA_U16 },
433 	[TCA_FLOWER_KEY_IP_TOS]		= { .type = NLA_U8 },
434 	[TCA_FLOWER_KEY_IP_TOS_MASK]	= { .type = NLA_U8 },
435 	[TCA_FLOWER_KEY_IP_TTL]		= { .type = NLA_U8 },
436 	[TCA_FLOWER_KEY_IP_TTL_MASK]	= { .type = NLA_U8 },
437 };
438 
439 static void fl_set_key_val(struct nlattr **tb,
440 			   void *val, int val_type,
441 			   void *mask, int mask_type, int len)
442 {
443 	if (!tb[val_type])
444 		return;
445 	memcpy(val, nla_data(tb[val_type]), len);
446 	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
447 		memset(mask, 0xff, len);
448 	else
449 		memcpy(mask, nla_data(tb[mask_type]), len);
450 }
451 
452 static int fl_set_key_mpls(struct nlattr **tb,
453 			   struct flow_dissector_key_mpls *key_val,
454 			   struct flow_dissector_key_mpls *key_mask)
455 {
456 	if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
457 		key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
458 		key_mask->mpls_ttl = MPLS_TTL_MASK;
459 	}
460 	if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
461 		u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
462 
463 		if (bos & ~MPLS_BOS_MASK)
464 			return -EINVAL;
465 		key_val->mpls_bos = bos;
466 		key_mask->mpls_bos = MPLS_BOS_MASK;
467 	}
468 	if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
469 		u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);
470 
471 		if (tc & ~MPLS_TC_MASK)
472 			return -EINVAL;
473 		key_val->mpls_tc = tc;
474 		key_mask->mpls_tc = MPLS_TC_MASK;
475 	}
476 	if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
477 		u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);
478 
479 		if (label & ~MPLS_LABEL_MASK)
480 			return -EINVAL;
481 		key_val->mpls_label = label;
482 		key_mask->mpls_label = MPLS_LABEL_MASK;
483 	}
484 	return 0;
485 }
486 
487 static void fl_set_key_vlan(struct nlattr **tb,
488 			    struct flow_dissector_key_vlan *key_val,
489 			    struct flow_dissector_key_vlan *key_mask)
490 {
491 #define VLAN_PRIORITY_MASK	0x7
492 
493 	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
494 		key_val->vlan_id =
495 			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
496 		key_mask->vlan_id = VLAN_VID_MASK;
497 	}
498 	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
499 		key_val->vlan_priority =
500 			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
501 			VLAN_PRIORITY_MASK;
502 		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
503 	}
504 }
505 
506 static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
507 			    u32 *dissector_key, u32 *dissector_mask,
508 			    u32 flower_flag_bit, u32 dissector_flag_bit)
509 {
510 	if (flower_mask & flower_flag_bit) {
511 		*dissector_mask |= dissector_flag_bit;
512 		if (flower_key & flower_flag_bit)
513 			*dissector_key |= dissector_flag_bit;
514 	}
515 }
516 
517 static int fl_set_key_flags(struct nlattr **tb,
518 			    u32 *flags_key, u32 *flags_mask)
519 {
520 	u32 key, mask;
521 
522 	/* mask is mandatory for flags */
523 	if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
524 		return -EINVAL;
525 
526 	key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
527 	mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
528 
529 	*flags_key  = 0;
530 	*flags_mask = 0;
531 
532 	fl_set_key_flag(key, mask, flags_key, flags_mask,
533 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
534 	fl_set_key_flag(key, mask, flags_key, flags_mask,
535 			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
536 			FLOW_DIS_FIRST_FRAG);
537 
538 	return 0;
539 }
540 
541 static void fl_set_key_ip(struct nlattr **tb,
542 			  struct flow_dissector_key_ip *key,
543 			  struct flow_dissector_key_ip *mask)
544 {
545 		fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS,
546 			       &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK,
547 			       sizeof(key->tos));
548 
549 		fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL,
550 			       &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK,
551 			       sizeof(key->ttl));
552 }
553 
554 static int fl_set_key(struct net *net, struct nlattr **tb,
555 		      struct fl_flow_key *key, struct fl_flow_key *mask,
556 		      struct netlink_ext_ack *extack)
557 {
558 	__be16 ethertype;
559 	int ret = 0;
560 #ifdef CONFIG_NET_CLS_IND
561 	if (tb[TCA_FLOWER_INDEV]) {
562 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
563 		if (err < 0)
564 			return err;
565 		key->indev_ifindex = err;
566 		mask->indev_ifindex = 0xffffffff;
567 	}
568 #endif
569 
570 	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
571 		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
572 		       sizeof(key->eth.dst));
573 	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
574 		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
575 		       sizeof(key->eth.src));
576 
577 	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
578 		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
579 
580 		if (ethertype == htons(ETH_P_8021Q)) {
581 			fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
582 			fl_set_key_val(tb, &key->basic.n_proto,
583 				       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
584 				       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
585 				       sizeof(key->basic.n_proto));
586 		} else {
587 			key->basic.n_proto = ethertype;
588 			mask->basic.n_proto = cpu_to_be16(~0);
589 		}
590 	}
591 
592 	if (key->basic.n_proto == htons(ETH_P_IP) ||
593 	    key->basic.n_proto == htons(ETH_P_IPV6)) {
594 		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
595 			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
596 			       sizeof(key->basic.ip_proto));
597 		fl_set_key_ip(tb, &key->ip, &mask->ip);
598 	}
599 
600 	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
601 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
602 		mask->control.addr_type = ~0;
603 		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
604 			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
605 			       sizeof(key->ipv4.src));
606 		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
607 			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
608 			       sizeof(key->ipv4.dst));
609 	} else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
610 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
611 		mask->control.addr_type = ~0;
612 		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
613 			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
614 			       sizeof(key->ipv6.src));
615 		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
616 			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
617 			       sizeof(key->ipv6.dst));
618 	}
619 
620 	if (key->basic.ip_proto == IPPROTO_TCP) {
621 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
622 			       &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
623 			       sizeof(key->tp.src));
624 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
625 			       &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
626 			       sizeof(key->tp.dst));
627 		fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
628 			       &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
629 			       sizeof(key->tcp.flags));
630 	} else if (key->basic.ip_proto == IPPROTO_UDP) {
631 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
632 			       &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
633 			       sizeof(key->tp.src));
634 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
635 			       &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
636 			       sizeof(key->tp.dst));
637 	} else if (key->basic.ip_proto == IPPROTO_SCTP) {
638 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
639 			       &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
640 			       sizeof(key->tp.src));
641 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
642 			       &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
643 			       sizeof(key->tp.dst));
644 	} else if (key->basic.n_proto == htons(ETH_P_IP) &&
645 		   key->basic.ip_proto == IPPROTO_ICMP) {
646 		fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
647 			       &mask->icmp.type,
648 			       TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
649 			       sizeof(key->icmp.type));
650 		fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
651 			       &mask->icmp.code,
652 			       TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
653 			       sizeof(key->icmp.code));
654 	} else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
655 		   key->basic.ip_proto == IPPROTO_ICMPV6) {
656 		fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
657 			       &mask->icmp.type,
658 			       TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
659 			       sizeof(key->icmp.type));
660 		fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE,
661 			       &mask->icmp.code,
662 			       TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
663 			       sizeof(key->icmp.code));
664 	} else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) ||
665 		   key->basic.n_proto == htons(ETH_P_MPLS_MC)) {
666 		ret = fl_set_key_mpls(tb, &key->mpls, &mask->mpls);
667 		if (ret)
668 			return ret;
669 	} else if (key->basic.n_proto == htons(ETH_P_ARP) ||
670 		   key->basic.n_proto == htons(ETH_P_RARP)) {
671 		fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
672 			       &mask->arp.sip, TCA_FLOWER_KEY_ARP_SIP_MASK,
673 			       sizeof(key->arp.sip));
674 		fl_set_key_val(tb, &key->arp.tip, TCA_FLOWER_KEY_ARP_TIP,
675 			       &mask->arp.tip, TCA_FLOWER_KEY_ARP_TIP_MASK,
676 			       sizeof(key->arp.tip));
677 		fl_set_key_val(tb, &key->arp.op, TCA_FLOWER_KEY_ARP_OP,
678 			       &mask->arp.op, TCA_FLOWER_KEY_ARP_OP_MASK,
679 			       sizeof(key->arp.op));
680 		fl_set_key_val(tb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
681 			       mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
682 			       sizeof(key->arp.sha));
683 		fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
684 			       mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
685 			       sizeof(key->arp.tha));
686 	}
687 
688 	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
689 	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
690 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
691 		mask->enc_control.addr_type = ~0;
692 		fl_set_key_val(tb, &key->enc_ipv4.src,
693 			       TCA_FLOWER_KEY_ENC_IPV4_SRC,
694 			       &mask->enc_ipv4.src,
695 			       TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
696 			       sizeof(key->enc_ipv4.src));
697 		fl_set_key_val(tb, &key->enc_ipv4.dst,
698 			       TCA_FLOWER_KEY_ENC_IPV4_DST,
699 			       &mask->enc_ipv4.dst,
700 			       TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
701 			       sizeof(key->enc_ipv4.dst));
702 	}
703 
704 	if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
705 	    tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
706 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
707 		mask->enc_control.addr_type = ~0;
708 		fl_set_key_val(tb, &key->enc_ipv6.src,
709 			       TCA_FLOWER_KEY_ENC_IPV6_SRC,
710 			       &mask->enc_ipv6.src,
711 			       TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
712 			       sizeof(key->enc_ipv6.src));
713 		fl_set_key_val(tb, &key->enc_ipv6.dst,
714 			       TCA_FLOWER_KEY_ENC_IPV6_DST,
715 			       &mask->enc_ipv6.dst,
716 			       TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
717 			       sizeof(key->enc_ipv6.dst));
718 	}
719 
720 	fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
721 		       &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
722 		       sizeof(key->enc_key_id.keyid));
723 
724 	fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
725 		       &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
726 		       sizeof(key->enc_tp.src));
727 
728 	fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
729 		       &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
730 		       sizeof(key->enc_tp.dst));
731 
732 	if (tb[TCA_FLOWER_KEY_FLAGS])
733 		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
734 
735 	return ret;
736 }
737 
738 static void fl_mask_copy(struct fl_flow_mask *dst,
739 			 struct fl_flow_mask *src)
740 {
741 	const void *psrc = fl_key_get_start(&src->key, src);
742 	void *pdst = fl_key_get_start(&dst->key, src);
743 
744 	memcpy(pdst, psrc, fl_mask_range(src));
745 	dst->range = src->range;
746 }
747 
748 static const struct rhashtable_params fl_ht_params = {
749 	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
750 	.head_offset = offsetof(struct cls_fl_filter, ht_node),
751 	.automatic_shrinking = true,
752 };
753 
754 static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
755 {
756 	mask->filter_ht_params = fl_ht_params;
757 	mask->filter_ht_params.key_len = fl_mask_range(mask);
758 	mask->filter_ht_params.key_offset += mask->range.start;
759 
760 	return rhashtable_init(&mask->ht, &mask->filter_ht_params);
761 }
762 
763 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
764 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
765 
766 #define FL_KEY_IS_MASKED(mask, member)						\
767 	memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),		\
768 		   0, FL_KEY_MEMBER_SIZE(member))				\
769 
770 #define FL_KEY_SET(keys, cnt, id, member)					\
771 	do {									\
772 		keys[cnt].key_id = id;						\
773 		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
774 		cnt++;								\
775 	} while(0);
776 
777 #define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)			\
778 	do {									\
779 		if (FL_KEY_IS_MASKED(mask, member))				\
780 			FL_KEY_SET(keys, cnt, id, member);			\
781 	} while(0);
782 
783 static void fl_init_dissector(struct fl_flow_mask *mask)
784 {
785 	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
786 	size_t cnt = 0;
787 
788 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
789 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
790 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
791 			     FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
792 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
793 			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
794 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
795 			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
796 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
797 			     FLOW_DISSECTOR_KEY_PORTS, tp);
798 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
799 			     FLOW_DISSECTOR_KEY_IP, ip);
800 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
801 			     FLOW_DISSECTOR_KEY_TCP, tcp);
802 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
803 			     FLOW_DISSECTOR_KEY_ICMP, icmp);
804 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
805 			     FLOW_DISSECTOR_KEY_ARP, arp);
806 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
807 			     FLOW_DISSECTOR_KEY_MPLS, mpls);
808 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
809 			     FLOW_DISSECTOR_KEY_VLAN, vlan);
810 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
811 			     FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
812 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
813 			     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
814 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
815 			     FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
816 	if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
817 	    FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
818 		FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
819 			   enc_control);
820 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
821 			     FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
822 
823 	skb_flow_dissector_init(&mask->dissector, keys, cnt);
824 }
825 
826 static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
827 					       struct fl_flow_mask *mask)
828 {
829 	struct fl_flow_mask *newmask;
830 	int err;
831 
832 	newmask = kzalloc(sizeof(*newmask), GFP_KERNEL);
833 	if (!newmask)
834 		return ERR_PTR(-ENOMEM);
835 
836 	fl_mask_copy(newmask, mask);
837 
838 	err = fl_init_mask_hashtable(newmask);
839 	if (err)
840 		goto errout_free;
841 
842 	fl_init_dissector(newmask);
843 
844 	INIT_LIST_HEAD_RCU(&newmask->filters);
845 
846 	err = rhashtable_insert_fast(&head->ht, &newmask->ht_node,
847 				     mask_ht_params);
848 	if (err)
849 		goto errout_destroy;
850 
851 	list_add_tail_rcu(&newmask->list, &head->masks);
852 
853 	return newmask;
854 
855 errout_destroy:
856 	rhashtable_destroy(&newmask->ht);
857 errout_free:
858 	kfree(newmask);
859 
860 	return ERR_PTR(err);
861 }
862 
863 static int fl_check_assign_mask(struct cls_fl_head *head,
864 				struct cls_fl_filter *fnew,
865 				struct cls_fl_filter *fold,
866 				struct fl_flow_mask *mask)
867 {
868 	struct fl_flow_mask *newmask;
869 
870 	fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params);
871 	if (!fnew->mask) {
872 		if (fold)
873 			return -EINVAL;
874 
875 		newmask = fl_create_new_mask(head, mask);
876 		if (IS_ERR(newmask))
877 			return PTR_ERR(newmask);
878 
879 		fnew->mask = newmask;
880 	} else if (fold && fold->mask != fnew->mask) {
881 		return -EINVAL;
882 	}
883 
884 	return 0;
885 }
886 
887 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
888 			struct cls_fl_filter *f, struct fl_flow_mask *mask,
889 			unsigned long base, struct nlattr **tb,
890 			struct nlattr *est, bool ovr,
891 			struct netlink_ext_ack *extack)
892 {
893 	int err;
894 
895 	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
896 	if (err < 0)
897 		return err;
898 
899 	if (tb[TCA_FLOWER_CLASSID]) {
900 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
901 		tcf_bind_filter(tp, &f->res, base);
902 	}
903 
904 	err = fl_set_key(net, tb, &f->key, &mask->key, extack);
905 	if (err)
906 		return err;
907 
908 	fl_mask_update_range(mask);
909 	fl_set_masked_key(&f->mkey, &f->key, mask);
910 
911 	return 0;
912 }
913 
914 static int fl_change(struct net *net, struct sk_buff *in_skb,
915 		     struct tcf_proto *tp, unsigned long base,
916 		     u32 handle, struct nlattr **tca,
917 		     void **arg, bool ovr, struct netlink_ext_ack *extack)
918 {
919 	struct cls_fl_head *head = rtnl_dereference(tp->root);
920 	struct cls_fl_filter *fold = *arg;
921 	struct cls_fl_filter *fnew;
922 	struct nlattr **tb;
923 	struct fl_flow_mask mask = {};
924 	int err;
925 
926 	if (!tca[TCA_OPTIONS])
927 		return -EINVAL;
928 
929 	tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
930 	if (!tb)
931 		return -ENOBUFS;
932 
933 	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
934 			       fl_policy, NULL);
935 	if (err < 0)
936 		goto errout_tb;
937 
938 	if (fold && handle && fold->handle != handle) {
939 		err = -EINVAL;
940 		goto errout_tb;
941 	}
942 
943 	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
944 	if (!fnew) {
945 		err = -ENOBUFS;
946 		goto errout_tb;
947 	}
948 
949 	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
950 	if (err < 0)
951 		goto errout;
952 
953 	if (!handle) {
954 		handle = 1;
955 		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
956 				    INT_MAX, GFP_KERNEL);
957 	} else if (!fold) {
958 		/* user specifies a handle and it doesn't exist */
959 		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
960 				    handle, GFP_KERNEL);
961 	}
962 	if (err)
963 		goto errout;
964 	fnew->handle = handle;
965 
966 	if (tb[TCA_FLOWER_FLAGS]) {
967 		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
968 
969 		if (!tc_flags_valid(fnew->flags)) {
970 			err = -EINVAL;
971 			goto errout_idr;
972 		}
973 	}
974 
975 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
976 			   extack);
977 	if (err)
978 		goto errout_idr;
979 
980 	err = fl_check_assign_mask(head, fnew, fold, &mask);
981 	if (err)
982 		goto errout_idr;
983 
984 	if (!tc_skip_sw(fnew->flags)) {
985 		if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
986 			err = -EEXIST;
987 			goto errout_mask;
988 		}
989 
990 		err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
991 					     fnew->mask->filter_ht_params);
992 		if (err)
993 			goto errout_mask;
994 	}
995 
996 	if (!tc_skip_hw(fnew->flags)) {
997 		err = fl_hw_replace_filter(tp, fnew, extack);
998 		if (err)
999 			goto errout_mask;
1000 	}
1001 
1002 	if (!tc_in_hw(fnew->flags))
1003 		fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
1004 
1005 	if (fold) {
1006 		if (!tc_skip_sw(fold->flags))
1007 			rhashtable_remove_fast(&fold->mask->ht,
1008 					       &fold->ht_node,
1009 					       fold->mask->filter_ht_params);
1010 		if (!tc_skip_hw(fold->flags))
1011 			fl_hw_destroy_filter(tp, fold, NULL);
1012 	}
1013 
1014 	*arg = fnew;
1015 
1016 	if (fold) {
1017 		idr_replace(&head->handle_idr, fnew, fnew->handle);
1018 		list_replace_rcu(&fold->list, &fnew->list);
1019 		tcf_unbind_filter(tp, &fold->res);
1020 		tcf_exts_get_net(&fold->exts);
1021 		tcf_queue_work(&fold->rwork, fl_destroy_filter_work);
1022 	} else {
1023 		list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
1024 	}
1025 
1026 	kfree(tb);
1027 	return 0;
1028 
1029 errout_mask:
1030 	fl_mask_put(head, fnew->mask, false);
1031 
1032 errout_idr:
1033 	if (!fold)
1034 		idr_remove(&head->handle_idr, fnew->handle);
1035 errout:
1036 	tcf_exts_destroy(&fnew->exts);
1037 	kfree(fnew);
1038 errout_tb:
1039 	kfree(tb);
1040 	return err;
1041 }
1042 
1043 static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
1044 		     struct netlink_ext_ack *extack)
1045 {
1046 	struct cls_fl_head *head = rtnl_dereference(tp->root);
1047 	struct cls_fl_filter *f = arg;
1048 
1049 	if (!tc_skip_sw(f->flags))
1050 		rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
1051 				       f->mask->filter_ht_params);
1052 	__fl_delete(tp, f, extack);
1053 	*last = list_empty(&head->masks);
1054 	return 0;
1055 }
1056 
1057 static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
1058 {
1059 	struct cls_fl_head *head = rtnl_dereference(tp->root);
1060 	struct cls_fl_filter *f;
1061 	struct fl_flow_mask *mask;
1062 
1063 	list_for_each_entry_rcu(mask, &head->masks, list) {
1064 		list_for_each_entry_rcu(f, &mask->filters, list) {
1065 			if (arg->count < arg->skip)
1066 				goto skip;
1067 			if (arg->fn(tp, f, arg) < 0) {
1068 				arg->stop = 1;
1069 				break;
1070 			}
1071 skip:
1072 			arg->count++;
1073 		}
1074 	}
1075 }
1076 
1077 static int fl_dump_key_val(struct sk_buff *skb,
1078 			   void *val, int val_type,
1079 			   void *mask, int mask_type, int len)
1080 {
1081 	int err;
1082 
1083 	if (!memchr_inv(mask, 0, len))
1084 		return 0;
1085 	err = nla_put(skb, val_type, len, val);
1086 	if (err)
1087 		return err;
1088 	if (mask_type != TCA_FLOWER_UNSPEC) {
1089 		err = nla_put(skb, mask_type, len, mask);
1090 		if (err)
1091 			return err;
1092 	}
1093 	return 0;
1094 }
1095 
1096 static int fl_dump_key_mpls(struct sk_buff *skb,
1097 			    struct flow_dissector_key_mpls *mpls_key,
1098 			    struct flow_dissector_key_mpls *mpls_mask)
1099 {
1100 	int err;
1101 
1102 	if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
1103 		return 0;
1104 	if (mpls_mask->mpls_ttl) {
1105 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
1106 				 mpls_key->mpls_ttl);
1107 		if (err)
1108 			return err;
1109 	}
1110 	if (mpls_mask->mpls_tc) {
1111 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
1112 				 mpls_key->mpls_tc);
1113 		if (err)
1114 			return err;
1115 	}
1116 	if (mpls_mask->mpls_label) {
1117 		err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
1118 				  mpls_key->mpls_label);
1119 		if (err)
1120 			return err;
1121 	}
1122 	if (mpls_mask->mpls_bos) {
1123 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
1124 				 mpls_key->mpls_bos);
1125 		if (err)
1126 			return err;
1127 	}
1128 	return 0;
1129 }
1130 
1131 static int fl_dump_key_ip(struct sk_buff *skb,
1132 			  struct flow_dissector_key_ip *key,
1133 			  struct flow_dissector_key_ip *mask)
1134 {
1135 	if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos,
1136 			    TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) ||
1137 	    fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl,
1138 			    TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl)))
1139 		return -1;
1140 
1141 	return 0;
1142 }
1143 
1144 static int fl_dump_key_vlan(struct sk_buff *skb,
1145 			    struct flow_dissector_key_vlan *vlan_key,
1146 			    struct flow_dissector_key_vlan *vlan_mask)
1147 {
1148 	int err;
1149 
1150 	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
1151 		return 0;
1152 	if (vlan_mask->vlan_id) {
1153 		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
1154 				  vlan_key->vlan_id);
1155 		if (err)
1156 			return err;
1157 	}
1158 	if (vlan_mask->vlan_priority) {
1159 		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
1160 				 vlan_key->vlan_priority);
1161 		if (err)
1162 			return err;
1163 	}
1164 	return 0;
1165 }
1166 
1167 static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
1168 			    u32 *flower_key, u32 *flower_mask,
1169 			    u32 flower_flag_bit, u32 dissector_flag_bit)
1170 {
1171 	if (dissector_mask & dissector_flag_bit) {
1172 		*flower_mask |= flower_flag_bit;
1173 		if (dissector_key & dissector_flag_bit)
1174 			*flower_key |= flower_flag_bit;
1175 	}
1176 }
1177 
1178 static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
1179 {
1180 	u32 key, mask;
1181 	__be32 _key, _mask;
1182 	int err;
1183 
1184 	if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
1185 		return 0;
1186 
1187 	key = 0;
1188 	mask = 0;
1189 
1190 	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
1191 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
1192 	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
1193 			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
1194 			FLOW_DIS_FIRST_FRAG);
1195 
1196 	_key = cpu_to_be32(key);
1197 	_mask = cpu_to_be32(mask);
1198 
1199 	err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
1200 	if (err)
1201 		return err;
1202 
1203 	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
1204 }
1205 
1206 static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
1207 		   struct sk_buff *skb, struct tcmsg *t)
1208 {
1209 	struct cls_fl_filter *f = fh;
1210 	struct nlattr *nest;
1211 	struct fl_flow_key *key, *mask;
1212 
1213 	if (!f)
1214 		return skb->len;
1215 
1216 	t->tcm_handle = f->handle;
1217 
1218 	nest = nla_nest_start(skb, TCA_OPTIONS);
1219 	if (!nest)
1220 		goto nla_put_failure;
1221 
1222 	if (f->res.classid &&
1223 	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
1224 		goto nla_put_failure;
1225 
1226 	key = &f->key;
1227 	mask = &f->mask->key;
1228 
1229 	if (mask->indev_ifindex) {
1230 		struct net_device *dev;
1231 
1232 		dev = __dev_get_by_index(net, key->indev_ifindex);
1233 		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
1234 			goto nla_put_failure;
1235 	}
1236 
1237 	if (!tc_skip_hw(f->flags))
1238 		fl_hw_update_stats(tp, f);
1239 
1240 	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
1241 			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
1242 			    sizeof(key->eth.dst)) ||
1243 	    fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
1244 			    mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
1245 			    sizeof(key->eth.src)) ||
1246 	    fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
1247 			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
1248 			    sizeof(key->basic.n_proto)))
1249 		goto nla_put_failure;
1250 
1251 	if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
1252 		goto nla_put_failure;
1253 
1254 	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
1255 		goto nla_put_failure;
1256 
1257 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
1258 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
1259 	    (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
1260 			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
1261 			    sizeof(key->basic.ip_proto)) ||
1262 	    fl_dump_key_ip(skb, &key->ip, &mask->ip)))
1263 		goto nla_put_failure;
1264 
1265 	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
1266 	    (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
1267 			     &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
1268 			     sizeof(key->ipv4.src)) ||
1269 	     fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
1270 			     &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
1271 			     sizeof(key->ipv4.dst))))
1272 		goto nla_put_failure;
1273 	else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
1274 		 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
1275 				  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1276 				  sizeof(key->ipv6.src)) ||
1277 		  fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
1278 				  &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
1279 				  sizeof(key->ipv6.dst))))
1280 		goto nla_put_failure;
1281 
1282 	if (key->basic.ip_proto == IPPROTO_TCP &&
1283 	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
1284 			     &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
1285 			     sizeof(key->tp.src)) ||
1286 	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
1287 			     &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
1288 			     sizeof(key->tp.dst)) ||
1289 	     fl_dump_key_val(skb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
1290 			     &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1291 			     sizeof(key->tcp.flags))))
1292 		goto nla_put_failure;
1293 	else if (key->basic.ip_proto == IPPROTO_UDP &&
1294 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
1295 				  &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
1296 				  sizeof(key->tp.src)) ||
1297 		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
1298 				  &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
1299 				  sizeof(key->tp.dst))))
1300 		goto nla_put_failure;
1301 	else if (key->basic.ip_proto == IPPROTO_SCTP &&
1302 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
1303 				  &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
1304 				  sizeof(key->tp.src)) ||
1305 		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
1306 				  &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
1307 				  sizeof(key->tp.dst))))
1308 		goto nla_put_failure;
1309 	else if (key->basic.n_proto == htons(ETH_P_IP) &&
1310 		 key->basic.ip_proto == IPPROTO_ICMP &&
1311 		 (fl_dump_key_val(skb, &key->icmp.type,
1312 				  TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type,
1313 				  TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
1314 				  sizeof(key->icmp.type)) ||
1315 		  fl_dump_key_val(skb, &key->icmp.code,
1316 				  TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code,
1317 				  TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
1318 				  sizeof(key->icmp.code))))
1319 		goto nla_put_failure;
1320 	else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
1321 		 key->basic.ip_proto == IPPROTO_ICMPV6 &&
1322 		 (fl_dump_key_val(skb, &key->icmp.type,
1323 				  TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type,
1324 				  TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
1325 				  sizeof(key->icmp.type)) ||
1326 		  fl_dump_key_val(skb, &key->icmp.code,
1327 				  TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code,
1328 				  TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
1329 				  sizeof(key->icmp.code))))
1330 		goto nla_put_failure;
1331 	else if ((key->basic.n_proto == htons(ETH_P_ARP) ||
1332 		  key->basic.n_proto == htons(ETH_P_RARP)) &&
1333 		 (fl_dump_key_val(skb, &key->arp.sip,
1334 				  TCA_FLOWER_KEY_ARP_SIP, &mask->arp.sip,
1335 				  TCA_FLOWER_KEY_ARP_SIP_MASK,
1336 				  sizeof(key->arp.sip)) ||
1337 		  fl_dump_key_val(skb, &key->arp.tip,
1338 				  TCA_FLOWER_KEY_ARP_TIP, &mask->arp.tip,
1339 				  TCA_FLOWER_KEY_ARP_TIP_MASK,
1340 				  sizeof(key->arp.tip)) ||
1341 		  fl_dump_key_val(skb, &key->arp.op,
1342 				  TCA_FLOWER_KEY_ARP_OP, &mask->arp.op,
1343 				  TCA_FLOWER_KEY_ARP_OP_MASK,
1344 				  sizeof(key->arp.op)) ||
1345 		  fl_dump_key_val(skb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
1346 				  mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
1347 				  sizeof(key->arp.sha)) ||
1348 		  fl_dump_key_val(skb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
1349 				  mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
1350 				  sizeof(key->arp.tha))))
1351 		goto nla_put_failure;
1352 
1353 	if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
1354 	    (fl_dump_key_val(skb, &key->enc_ipv4.src,
1355 			    TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
1356 			    TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
1357 			    sizeof(key->enc_ipv4.src)) ||
1358 	     fl_dump_key_val(skb, &key->enc_ipv4.dst,
1359 			     TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
1360 			     TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
1361 			     sizeof(key->enc_ipv4.dst))))
1362 		goto nla_put_failure;
1363 	else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
1364 		 (fl_dump_key_val(skb, &key->enc_ipv6.src,
1365 			    TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
1366 			    TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
1367 			    sizeof(key->enc_ipv6.src)) ||
1368 		 fl_dump_key_val(skb, &key->enc_ipv6.dst,
1369 				 TCA_FLOWER_KEY_ENC_IPV6_DST,
1370 				 &mask->enc_ipv6.dst,
1371 				 TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
1372 			    sizeof(key->enc_ipv6.dst))))
1373 		goto nla_put_failure;
1374 
1375 	if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
1376 			    &mask->enc_key_id, TCA_FLOWER_UNSPEC,
1377 			    sizeof(key->enc_key_id)) ||
1378 	    fl_dump_key_val(skb, &key->enc_tp.src,
1379 			    TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
1380 			    &mask->enc_tp.src,
1381 			    TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
1382 			    sizeof(key->enc_tp.src)) ||
1383 	    fl_dump_key_val(skb, &key->enc_tp.dst,
1384 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
1385 			    &mask->enc_tp.dst,
1386 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
1387 			    sizeof(key->enc_tp.dst)))
1388 		goto nla_put_failure;
1389 
1390 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
1391 		goto nla_put_failure;
1392 
1393 	if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
1394 		goto nla_put_failure;
1395 
1396 	if (tcf_exts_dump(skb, &f->exts))
1397 		goto nla_put_failure;
1398 
1399 	nla_nest_end(skb, nest);
1400 
1401 	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
1402 		goto nla_put_failure;
1403 
1404 	return skb->len;
1405 
1406 nla_put_failure:
1407 	nla_nest_cancel(skb, nest);
1408 	return -1;
1409 }
1410 
1411 static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
1412 {
1413 	struct cls_fl_filter *f = fh;
1414 
1415 	if (f && f->res.classid == classid)
1416 		f->res.class = cl;
1417 }
1418 
1419 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
1420 	.kind		= "flower",
1421 	.classify	= fl_classify,
1422 	.init		= fl_init,
1423 	.destroy	= fl_destroy,
1424 	.get		= fl_get,
1425 	.change		= fl_change,
1426 	.delete		= fl_delete,
1427 	.walk		= fl_walk,
1428 	.dump		= fl_dump,
1429 	.bind_class	= fl_bind_class,
1430 	.owner		= THIS_MODULE,
1431 };
1432 
1433 static int __init cls_fl_init(void)
1434 {
1435 	return register_tcf_proto_ops(&cls_fl_ops);
1436 }
1437 
1438 static void __exit cls_fl_exit(void)
1439 {
1440 	unregister_tcf_proto_ops(&cls_fl_ops);
1441 }
1442 
1443 module_init(cls_fl_init);
1444 module_exit(cls_fl_exit);
1445 
1446 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
1447 MODULE_DESCRIPTION("Flower classifier");
1448 MODULE_LICENSE("GPL v2");
1449