1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_conntrack.h>
12 #include <net/netfilter/nf_conntrack_acct.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_tuple.h>
15 
16 static struct workqueue_struct *nf_flow_offload_wq;
17 
18 struct flow_offload_work {
19 	struct list_head	list;
20 	enum flow_cls_command	cmd;
21 	int			priority;
22 	struct nf_flowtable	*flowtable;
23 	struct flow_offload	*flow;
24 	struct work_struct	work;
25 };
26 
27 #define NF_FLOW_DISSECTOR(__match, __type, __field)	\
28 	(__match)->dissector.offset[__type] =		\
29 		offsetof(struct nf_flow_key, __field)
30 
31 static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
32 				   struct ip_tunnel_info *tun_info)
33 {
34 	struct nf_flow_key *mask = &match->mask;
35 	struct nf_flow_key *key = &match->key;
36 	unsigned int enc_keys;
37 
38 	if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
39 		return;
40 
41 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
42 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
43 	key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
44 	mask->enc_key_id.keyid = 0xffffffff;
45 	enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
46 		   BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
47 
48 	if (ip_tunnel_info_af(tun_info) == AF_INET) {
49 		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
50 				  enc_ipv4);
51 		key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
52 		key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
53 		if (key->enc_ipv4.src)
54 			mask->enc_ipv4.src = 0xffffffff;
55 		if (key->enc_ipv4.dst)
56 			mask->enc_ipv4.dst = 0xffffffff;
57 		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
58 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
59 	} else {
60 		memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
61 		       sizeof(struct in6_addr));
62 		memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
63 		       sizeof(struct in6_addr));
64 		if (memcmp(&key->enc_ipv6.src, &in6addr_any,
65 			   sizeof(struct in6_addr)))
66 			memset(&key->enc_ipv6.src, 0xff,
67 			       sizeof(struct in6_addr));
68 		if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
69 			   sizeof(struct in6_addr)))
70 			memset(&key->enc_ipv6.dst, 0xff,
71 			       sizeof(struct in6_addr));
72 		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
73 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
74 	}
75 
76 	match->dissector.used_keys |= enc_keys;
77 }
78 
79 static int nf_flow_rule_match(struct nf_flow_match *match,
80 			      const struct flow_offload_tuple *tuple,
81 			      struct dst_entry *other_dst)
82 {
83 	struct nf_flow_key *mask = &match->mask;
84 	struct nf_flow_key *key = &match->key;
85 	struct ip_tunnel_info *tun_info;
86 
87 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
88 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
89 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
90 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
91 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
92 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
93 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
94 
95 	if (other_dst && other_dst->lwtstate) {
96 		tun_info = lwt_tun_info(other_dst->lwtstate);
97 		nf_flow_rule_lwt_match(match, tun_info);
98 	}
99 
100 	key->meta.ingress_ifindex = tuple->iifidx;
101 	mask->meta.ingress_ifindex = 0xffffffff;
102 
103 	switch (tuple->l3proto) {
104 	case AF_INET:
105 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
106 		key->basic.n_proto = htons(ETH_P_IP);
107 		key->ipv4.src = tuple->src_v4.s_addr;
108 		mask->ipv4.src = 0xffffffff;
109 		key->ipv4.dst = tuple->dst_v4.s_addr;
110 		mask->ipv4.dst = 0xffffffff;
111 		break;
112        case AF_INET6:
113 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
114 		key->basic.n_proto = htons(ETH_P_IPV6);
115 		key->ipv6.src = tuple->src_v6;
116 		memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
117 		key->ipv6.dst = tuple->dst_v6;
118 		memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
119 		break;
120 	default:
121 		return -EOPNOTSUPP;
122 	}
123 	mask->control.addr_type = 0xffff;
124 	match->dissector.used_keys |= BIT(key->control.addr_type);
125 	mask->basic.n_proto = 0xffff;
126 
127 	switch (tuple->l4proto) {
128 	case IPPROTO_TCP:
129 		key->tcp.flags = 0;
130 		mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
131 		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
132 		break;
133 	case IPPROTO_UDP:
134 		break;
135 	default:
136 		return -EOPNOTSUPP;
137 	}
138 
139 	key->basic.ip_proto = tuple->l4proto;
140 	mask->basic.ip_proto = 0xff;
141 
142 	key->tp.src = tuple->src_port;
143 	mask->tp.src = 0xffff;
144 	key->tp.dst = tuple->dst_port;
145 	mask->tp.dst = 0xffff;
146 
147 	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
148 				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
149 				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
150 				      BIT(FLOW_DISSECTOR_KEY_PORTS);
151 	return 0;
152 }
153 
154 static void flow_offload_mangle(struct flow_action_entry *entry,
155 				enum flow_action_mangle_base htype, u32 offset,
156 				const __be32 *value, const __be32 *mask)
157 {
158 	entry->id = FLOW_ACTION_MANGLE;
159 	entry->mangle.htype = htype;
160 	entry->mangle.offset = offset;
161 	memcpy(&entry->mangle.mask, mask, sizeof(u32));
162 	memcpy(&entry->mangle.val, value, sizeof(u32));
163 }
164 
165 static inline struct flow_action_entry *
166 flow_action_entry_next(struct nf_flow_rule *flow_rule)
167 {
168 	int i = flow_rule->rule->action.num_entries++;
169 
170 	return &flow_rule->rule->action.entries[i];
171 }
172 
173 static int flow_offload_eth_src(struct net *net,
174 				const struct flow_offload *flow,
175 				enum flow_offload_tuple_dir dir,
176 				struct nf_flow_rule *flow_rule)
177 {
178 	const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
179 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
180 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
181 	struct net_device *dev;
182 	u32 mask, val;
183 	u16 val16;
184 
185 	dev = dev_get_by_index(net, tuple->iifidx);
186 	if (!dev)
187 		return -ENOENT;
188 
189 	mask = ~0xffff0000;
190 	memcpy(&val16, dev->dev_addr, 2);
191 	val = val16 << 16;
192 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
193 			    &val, &mask);
194 
195 	mask = ~0xffffffff;
196 	memcpy(&val, dev->dev_addr + 2, 4);
197 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
198 			    &val, &mask);
199 	dev_put(dev);
200 
201 	return 0;
202 }
203 
204 static int flow_offload_eth_dst(struct net *net,
205 				const struct flow_offload *flow,
206 				enum flow_offload_tuple_dir dir,
207 				struct nf_flow_rule *flow_rule)
208 {
209 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
210 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
211 	const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
212 	const struct dst_entry *dst_cache;
213 	unsigned char ha[ETH_ALEN];
214 	struct neighbour *n;
215 	u32 mask, val;
216 	u8 nud_state;
217 	u16 val16;
218 
219 	dst_cache = flow->tuplehash[dir].tuple.dst_cache;
220 	n = dst_neigh_lookup(dst_cache, daddr);
221 	if (!n)
222 		return -ENOENT;
223 
224 	read_lock_bh(&n->lock);
225 	nud_state = n->nud_state;
226 	ether_addr_copy(ha, n->ha);
227 	read_unlock_bh(&n->lock);
228 
229 	if (!(nud_state & NUD_VALID)) {
230 		neigh_release(n);
231 		return -ENOENT;
232 	}
233 
234 	mask = ~0xffffffff;
235 	memcpy(&val, ha, 4);
236 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
237 			    &val, &mask);
238 
239 	mask = ~0x0000ffff;
240 	memcpy(&val16, ha + 4, 2);
241 	val = val16;
242 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
243 			    &val, &mask);
244 	neigh_release(n);
245 
246 	return 0;
247 }
248 
249 static void flow_offload_ipv4_snat(struct net *net,
250 				   const struct flow_offload *flow,
251 				   enum flow_offload_tuple_dir dir,
252 				   struct nf_flow_rule *flow_rule)
253 {
254 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
255 	u32 mask = ~htonl(0xffffffff);
256 	__be32 addr;
257 	u32 offset;
258 
259 	switch (dir) {
260 	case FLOW_OFFLOAD_DIR_ORIGINAL:
261 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
262 		offset = offsetof(struct iphdr, saddr);
263 		break;
264 	case FLOW_OFFLOAD_DIR_REPLY:
265 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
266 		offset = offsetof(struct iphdr, daddr);
267 		break;
268 	default:
269 		return;
270 	}
271 
272 	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
273 			    &addr, &mask);
274 }
275 
276 static void flow_offload_ipv4_dnat(struct net *net,
277 				   const struct flow_offload *flow,
278 				   enum flow_offload_tuple_dir dir,
279 				   struct nf_flow_rule *flow_rule)
280 {
281 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
282 	u32 mask = ~htonl(0xffffffff);
283 	__be32 addr;
284 	u32 offset;
285 
286 	switch (dir) {
287 	case FLOW_OFFLOAD_DIR_ORIGINAL:
288 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
289 		offset = offsetof(struct iphdr, daddr);
290 		break;
291 	case FLOW_OFFLOAD_DIR_REPLY:
292 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
293 		offset = offsetof(struct iphdr, saddr);
294 		break;
295 	default:
296 		return;
297 	}
298 
299 	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
300 			    &addr, &mask);
301 }
302 
303 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
304 				     unsigned int offset,
305 				     const __be32 *addr, const __be32 *mask)
306 {
307 	struct flow_action_entry *entry;
308 	int i;
309 
310 	for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
311 		entry = flow_action_entry_next(flow_rule);
312 		flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
313 				    offset + i, &addr[i], mask);
314 	}
315 }
316 
317 static void flow_offload_ipv6_snat(struct net *net,
318 				   const struct flow_offload *flow,
319 				   enum flow_offload_tuple_dir dir,
320 				   struct nf_flow_rule *flow_rule)
321 {
322 	u32 mask = ~htonl(0xffffffff);
323 	const __be32 *addr;
324 	u32 offset;
325 
326 	switch (dir) {
327 	case FLOW_OFFLOAD_DIR_ORIGINAL:
328 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
329 		offset = offsetof(struct ipv6hdr, saddr);
330 		break;
331 	case FLOW_OFFLOAD_DIR_REPLY:
332 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
333 		offset = offsetof(struct ipv6hdr, daddr);
334 		break;
335 	default:
336 		return;
337 	}
338 
339 	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
340 }
341 
342 static void flow_offload_ipv6_dnat(struct net *net,
343 				   const struct flow_offload *flow,
344 				   enum flow_offload_tuple_dir dir,
345 				   struct nf_flow_rule *flow_rule)
346 {
347 	u32 mask = ~htonl(0xffffffff);
348 	const __be32 *addr;
349 	u32 offset;
350 
351 	switch (dir) {
352 	case FLOW_OFFLOAD_DIR_ORIGINAL:
353 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
354 		offset = offsetof(struct ipv6hdr, daddr);
355 		break;
356 	case FLOW_OFFLOAD_DIR_REPLY:
357 		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
358 		offset = offsetof(struct ipv6hdr, saddr);
359 		break;
360 	default:
361 		return;
362 	}
363 
364 	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
365 }
366 
367 static int flow_offload_l4proto(const struct flow_offload *flow)
368 {
369 	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
370 	u8 type = 0;
371 
372 	switch (protonum) {
373 	case IPPROTO_TCP:
374 		type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
375 		break;
376 	case IPPROTO_UDP:
377 		type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
378 		break;
379 	default:
380 		break;
381 	}
382 
383 	return type;
384 }
385 
386 static void flow_offload_port_snat(struct net *net,
387 				   const struct flow_offload *flow,
388 				   enum flow_offload_tuple_dir dir,
389 				   struct nf_flow_rule *flow_rule)
390 {
391 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
392 	u32 mask, port;
393 	u32 offset;
394 
395 	switch (dir) {
396 	case FLOW_OFFLOAD_DIR_ORIGINAL:
397 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
398 		offset = 0; /* offsetof(struct tcphdr, source); */
399 		port = htonl(port << 16);
400 		mask = ~htonl(0xffff0000);
401 		break;
402 	case FLOW_OFFLOAD_DIR_REPLY:
403 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
404 		offset = 0; /* offsetof(struct tcphdr, dest); */
405 		port = htonl(port);
406 		mask = ~htonl(0xffff);
407 		break;
408 	default:
409 		return;
410 	}
411 
412 	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
413 			    &port, &mask);
414 }
415 
416 static void flow_offload_port_dnat(struct net *net,
417 				   const struct flow_offload *flow,
418 				   enum flow_offload_tuple_dir dir,
419 				   struct nf_flow_rule *flow_rule)
420 {
421 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
422 	u32 mask, port;
423 	u32 offset;
424 
425 	switch (dir) {
426 	case FLOW_OFFLOAD_DIR_ORIGINAL:
427 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
428 		offset = 0; /* offsetof(struct tcphdr, dest); */
429 		port = htonl(port);
430 		mask = ~htonl(0xffff);
431 		break;
432 	case FLOW_OFFLOAD_DIR_REPLY:
433 		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
434 		offset = 0; /* offsetof(struct tcphdr, source); */
435 		port = htonl(port << 16);
436 		mask = ~htonl(0xffff0000);
437 		break;
438 	default:
439 		return;
440 	}
441 
442 	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
443 			    &port, &mask);
444 }
445 
446 static void flow_offload_ipv4_checksum(struct net *net,
447 				       const struct flow_offload *flow,
448 				       struct nf_flow_rule *flow_rule)
449 {
450 	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
451 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
452 
453 	entry->id = FLOW_ACTION_CSUM;
454 	entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
455 
456 	switch (protonum) {
457 	case IPPROTO_TCP:
458 		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
459 		break;
460 	case IPPROTO_UDP:
461 		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
462 		break;
463 	}
464 }
465 
466 static void flow_offload_redirect(const struct flow_offload *flow,
467 				  enum flow_offload_tuple_dir dir,
468 				  struct nf_flow_rule *flow_rule)
469 {
470 	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
471 	struct rtable *rt;
472 
473 	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
474 	entry->id = FLOW_ACTION_REDIRECT;
475 	entry->dev = rt->dst.dev;
476 	dev_hold(rt->dst.dev);
477 }
478 
479 static void flow_offload_encap_tunnel(const struct flow_offload *flow,
480 				      enum flow_offload_tuple_dir dir,
481 				      struct nf_flow_rule *flow_rule)
482 {
483 	struct flow_action_entry *entry;
484 	struct dst_entry *dst;
485 
486 	dst = flow->tuplehash[dir].tuple.dst_cache;
487 	if (dst && dst->lwtstate) {
488 		struct ip_tunnel_info *tun_info;
489 
490 		tun_info = lwt_tun_info(dst->lwtstate);
491 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
492 			entry = flow_action_entry_next(flow_rule);
493 			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
494 			entry->tunnel = tun_info;
495 		}
496 	}
497 }
498 
499 static void flow_offload_decap_tunnel(const struct flow_offload *flow,
500 				      enum flow_offload_tuple_dir dir,
501 				      struct nf_flow_rule *flow_rule)
502 {
503 	struct flow_action_entry *entry;
504 	struct dst_entry *dst;
505 
506 	dst = flow->tuplehash[!dir].tuple.dst_cache;
507 	if (dst && dst->lwtstate) {
508 		struct ip_tunnel_info *tun_info;
509 
510 		tun_info = lwt_tun_info(dst->lwtstate);
511 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
512 			entry = flow_action_entry_next(flow_rule);
513 			entry->id = FLOW_ACTION_TUNNEL_DECAP;
514 		}
515 	}
516 }
517 
518 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
519 			    enum flow_offload_tuple_dir dir,
520 			    struct nf_flow_rule *flow_rule)
521 {
522 	flow_offload_decap_tunnel(flow, dir, flow_rule);
523 	flow_offload_encap_tunnel(flow, dir, flow_rule);
524 
525 	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
526 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
527 		return -1;
528 
529 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
530 		flow_offload_ipv4_snat(net, flow, dir, flow_rule);
531 		flow_offload_port_snat(net, flow, dir, flow_rule);
532 	}
533 	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
534 		flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
535 		flow_offload_port_dnat(net, flow, dir, flow_rule);
536 	}
537 	if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
538 	    test_bit(NF_FLOW_DNAT, &flow->flags))
539 		flow_offload_ipv4_checksum(net, flow, flow_rule);
540 
541 	flow_offload_redirect(flow, dir, flow_rule);
542 
543 	return 0;
544 }
545 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
546 
547 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
548 			    enum flow_offload_tuple_dir dir,
549 			    struct nf_flow_rule *flow_rule)
550 {
551 	flow_offload_decap_tunnel(flow, dir, flow_rule);
552 	flow_offload_encap_tunnel(flow, dir, flow_rule);
553 
554 	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
555 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
556 		return -1;
557 
558 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
559 		flow_offload_ipv6_snat(net, flow, dir, flow_rule);
560 		flow_offload_port_snat(net, flow, dir, flow_rule);
561 	}
562 	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
563 		flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
564 		flow_offload_port_dnat(net, flow, dir, flow_rule);
565 	}
566 
567 	flow_offload_redirect(flow, dir, flow_rule);
568 
569 	return 0;
570 }
571 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
572 
573 #define NF_FLOW_RULE_ACTION_MAX	16
574 
575 static struct nf_flow_rule *
576 nf_flow_offload_rule_alloc(struct net *net,
577 			   const struct flow_offload_work *offload,
578 			   enum flow_offload_tuple_dir dir)
579 {
580 	const struct nf_flowtable *flowtable = offload->flowtable;
581 	const struct flow_offload *flow = offload->flow;
582 	const struct flow_offload_tuple *tuple;
583 	struct nf_flow_rule *flow_rule;
584 	struct dst_entry *other_dst;
585 	int err = -ENOMEM;
586 
587 	flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
588 	if (!flow_rule)
589 		goto err_flow;
590 
591 	flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
592 	if (!flow_rule->rule)
593 		goto err_flow_rule;
594 
595 	flow_rule->rule->match.dissector = &flow_rule->match.dissector;
596 	flow_rule->rule->match.mask = &flow_rule->match.mask;
597 	flow_rule->rule->match.key = &flow_rule->match.key;
598 
599 	tuple = &flow->tuplehash[dir].tuple;
600 	other_dst = flow->tuplehash[!dir].tuple.dst_cache;
601 	err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
602 	if (err < 0)
603 		goto err_flow_match;
604 
605 	flow_rule->rule->action.num_entries = 0;
606 	if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
607 		goto err_flow_match;
608 
609 	return flow_rule;
610 
611 err_flow_match:
612 	kfree(flow_rule->rule);
613 err_flow_rule:
614 	kfree(flow_rule);
615 err_flow:
616 	return NULL;
617 }
618 
619 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
620 {
621 	struct flow_action_entry *entry;
622 	int i;
623 
624 	for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
625 		entry = &flow_rule->rule->action.entries[i];
626 		if (entry->id != FLOW_ACTION_REDIRECT)
627 			continue;
628 
629 		dev_put(entry->dev);
630 	}
631 	kfree(flow_rule->rule);
632 	kfree(flow_rule);
633 }
634 
635 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
636 {
637 	int i;
638 
639 	for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
640 		__nf_flow_offload_destroy(flow_rule[i]);
641 }
642 
643 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
644 				 struct nf_flow_rule *flow_rule[])
645 {
646 	struct net *net = read_pnet(&offload->flowtable->net);
647 
648 	flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
649 						  FLOW_OFFLOAD_DIR_ORIGINAL);
650 	if (!flow_rule[0])
651 		return -ENOMEM;
652 
653 	flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
654 						  FLOW_OFFLOAD_DIR_REPLY);
655 	if (!flow_rule[1]) {
656 		__nf_flow_offload_destroy(flow_rule[0]);
657 		return -ENOMEM;
658 	}
659 
660 	return 0;
661 }
662 
663 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
664 				 __be16 proto, int priority,
665 				 enum flow_cls_command cmd,
666 				 const struct flow_offload_tuple *tuple,
667 				 struct netlink_ext_ack *extack)
668 {
669 	cls_flow->common.protocol = proto;
670 	cls_flow->common.prio = priority;
671 	cls_flow->common.extack = extack;
672 	cls_flow->command = cmd;
673 	cls_flow->cookie = (unsigned long)tuple;
674 }
675 
676 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
677 				 struct flow_offload *flow,
678 				 struct nf_flow_rule *flow_rule,
679 				 enum flow_offload_tuple_dir dir,
680 				 int priority, int cmd,
681 				 struct flow_stats *stats,
682 				 struct list_head *block_cb_list)
683 {
684 	struct flow_cls_offload cls_flow = {};
685 	struct flow_block_cb *block_cb;
686 	struct netlink_ext_ack extack;
687 	__be16 proto = ETH_P_ALL;
688 	int err, i = 0;
689 
690 	nf_flow_offload_init(&cls_flow, proto, priority, cmd,
691 			     &flow->tuplehash[dir].tuple, &extack);
692 	if (cmd == FLOW_CLS_REPLACE)
693 		cls_flow.rule = flow_rule->rule;
694 
695 	down_read(&flowtable->flow_block_lock);
696 	list_for_each_entry(block_cb, block_cb_list, list) {
697 		err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
698 				   block_cb->cb_priv);
699 		if (err < 0)
700 			continue;
701 
702 		i++;
703 	}
704 	up_read(&flowtable->flow_block_lock);
705 
706 	if (cmd == FLOW_CLS_STATS)
707 		memcpy(stats, &cls_flow.stats, sizeof(*stats));
708 
709 	return i;
710 }
711 
712 static int flow_offload_tuple_add(struct flow_offload_work *offload,
713 				  struct nf_flow_rule *flow_rule,
714 				  enum flow_offload_tuple_dir dir)
715 {
716 	return nf_flow_offload_tuple(offload->flowtable, offload->flow,
717 				     flow_rule, dir, offload->priority,
718 				     FLOW_CLS_REPLACE, NULL,
719 				     &offload->flowtable->flow_block.cb_list);
720 }
721 
722 static void flow_offload_tuple_del(struct flow_offload_work *offload,
723 				   enum flow_offload_tuple_dir dir)
724 {
725 	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
726 			      offload->priority, FLOW_CLS_DESTROY, NULL,
727 			      &offload->flowtable->flow_block.cb_list);
728 }
729 
730 static int flow_offload_rule_add(struct flow_offload_work *offload,
731 				 struct nf_flow_rule *flow_rule[])
732 {
733 	int ok_count = 0;
734 
735 	ok_count += flow_offload_tuple_add(offload, flow_rule[0],
736 					   FLOW_OFFLOAD_DIR_ORIGINAL);
737 	ok_count += flow_offload_tuple_add(offload, flow_rule[1],
738 					   FLOW_OFFLOAD_DIR_REPLY);
739 	if (ok_count == 0)
740 		return -ENOENT;
741 
742 	return 0;
743 }
744 
745 static void flow_offload_work_add(struct flow_offload_work *offload)
746 {
747 	struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
748 	int err;
749 
750 	err = nf_flow_offload_alloc(offload, flow_rule);
751 	if (err < 0)
752 		return;
753 
754 	err = flow_offload_rule_add(offload, flow_rule);
755 	if (err < 0)
756 		set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
757 	else
758 		set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
759 
760 	nf_flow_offload_destroy(flow_rule);
761 }
762 
763 static void flow_offload_work_del(struct flow_offload_work *offload)
764 {
765 	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
766 	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
767 	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
768 	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
769 }
770 
771 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
772 				     enum flow_offload_tuple_dir dir,
773 				     struct flow_stats *stats)
774 {
775 	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
776 			      offload->priority, FLOW_CLS_STATS, stats,
777 			      &offload->flowtable->flow_block.cb_list);
778 }
779 
780 static void flow_offload_work_stats(struct flow_offload_work *offload)
781 {
782 	struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
783 	u64 lastused;
784 
785 	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
786 	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
787 
788 	lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
789 	offload->flow->timeout = max_t(u64, offload->flow->timeout,
790 				       lastused + NF_FLOW_TIMEOUT);
791 
792 	if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
793 		if (stats[0].pkts)
794 			nf_ct_acct_add(offload->flow->ct,
795 				       FLOW_OFFLOAD_DIR_ORIGINAL,
796 				       stats[0].pkts, stats[0].bytes);
797 		if (stats[1].pkts)
798 			nf_ct_acct_add(offload->flow->ct,
799 				       FLOW_OFFLOAD_DIR_REPLY,
800 				       stats[1].pkts, stats[1].bytes);
801 	}
802 }
803 
804 static void flow_offload_work_handler(struct work_struct *work)
805 {
806 	struct flow_offload_work *offload;
807 
808 	offload = container_of(work, struct flow_offload_work, work);
809 	switch (offload->cmd) {
810 		case FLOW_CLS_REPLACE:
811 			flow_offload_work_add(offload);
812 			break;
813 		case FLOW_CLS_DESTROY:
814 			flow_offload_work_del(offload);
815 			break;
816 		case FLOW_CLS_STATS:
817 			flow_offload_work_stats(offload);
818 			break;
819 		default:
820 			WARN_ON_ONCE(1);
821 	}
822 
823 	clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
824 	kfree(offload);
825 }
826 
827 static void flow_offload_queue_work(struct flow_offload_work *offload)
828 {
829 	queue_work(nf_flow_offload_wq, &offload->work);
830 }
831 
832 static struct flow_offload_work *
833 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
834 			   struct flow_offload *flow, unsigned int cmd)
835 {
836 	struct flow_offload_work *offload;
837 
838 	if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
839 		return NULL;
840 
841 	offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
842 	if (!offload) {
843 		clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
844 		return NULL;
845 	}
846 
847 	offload->cmd = cmd;
848 	offload->flow = flow;
849 	offload->priority = flowtable->priority;
850 	offload->flowtable = flowtable;
851 	INIT_WORK(&offload->work, flow_offload_work_handler);
852 
853 	return offload;
854 }
855 
856 
857 void nf_flow_offload_add(struct nf_flowtable *flowtable,
858 			 struct flow_offload *flow)
859 {
860 	struct flow_offload_work *offload;
861 
862 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
863 	if (!offload)
864 		return;
865 
866 	flow_offload_queue_work(offload);
867 }
868 
869 void nf_flow_offload_del(struct nf_flowtable *flowtable,
870 			 struct flow_offload *flow)
871 {
872 	struct flow_offload_work *offload;
873 
874 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
875 	if (!offload)
876 		return;
877 
878 	set_bit(NF_FLOW_HW_DYING, &flow->flags);
879 	flow_offload_queue_work(offload);
880 }
881 
882 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
883 			   struct flow_offload *flow)
884 {
885 	struct flow_offload_work *offload;
886 	__s32 delta;
887 
888 	delta = nf_flow_timeout_delta(flow->timeout);
889 	if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
890 		return;
891 
892 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
893 	if (!offload)
894 		return;
895 
896 	flow_offload_queue_work(offload);
897 }
898 
899 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
900 {
901 	if (nf_flowtable_hw_offload(flowtable))
902 		flush_workqueue(nf_flow_offload_wq);
903 }
904 
905 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
906 				     struct flow_block_offload *bo,
907 				     enum flow_block_command cmd)
908 {
909 	struct flow_block_cb *block_cb, *next;
910 	int err = 0;
911 
912 	switch (cmd) {
913 	case FLOW_BLOCK_BIND:
914 		list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
915 		break;
916 	case FLOW_BLOCK_UNBIND:
917 		list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
918 			list_del(&block_cb->list);
919 			flow_block_cb_free(block_cb);
920 		}
921 		break;
922 	default:
923 		WARN_ON_ONCE(1);
924 		err = -EOPNOTSUPP;
925 	}
926 
927 	return err;
928 }
929 
930 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
931 					     struct net *net,
932 					     enum flow_block_command cmd,
933 					     struct nf_flowtable *flowtable,
934 					     struct netlink_ext_ack *extack)
935 {
936 	memset(bo, 0, sizeof(*bo));
937 	bo->net		= net;
938 	bo->block	= &flowtable->flow_block;
939 	bo->command	= cmd;
940 	bo->binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
941 	bo->extack	= extack;
942 	INIT_LIST_HEAD(&bo->cb_list);
943 }
944 
945 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
946 {
947 	struct nf_flowtable *flowtable = block_cb->indr.data;
948 	struct net_device *dev = block_cb->indr.dev;
949 
950 	nf_flow_table_gc_cleanup(flowtable, dev);
951 	down_write(&flowtable->flow_block_lock);
952 	list_del(&block_cb->list);
953 	list_del(&block_cb->driver_list);
954 	flow_block_cb_free(block_cb);
955 	up_write(&flowtable->flow_block_lock);
956 }
957 
958 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
959 					  struct nf_flowtable *flowtable,
960 					  struct net_device *dev,
961 					  enum flow_block_command cmd,
962 					  struct netlink_ext_ack *extack)
963 {
964 	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
965 					 extack);
966 
967 	return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
968 					   nf_flow_table_indr_cleanup);
969 }
970 
971 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
972 				     struct nf_flowtable *flowtable,
973 				     struct net_device *dev,
974 				     enum flow_block_command cmd,
975 				     struct netlink_ext_ack *extack)
976 {
977 	int err;
978 
979 	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
980 					 extack);
981 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
982 	if (err < 0)
983 		return err;
984 
985 	return 0;
986 }
987 
988 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
989 				struct net_device *dev,
990 				enum flow_block_command cmd)
991 {
992 	struct netlink_ext_ack extack = {};
993 	struct flow_block_offload bo;
994 	int err;
995 
996 	if (!nf_flowtable_hw_offload(flowtable))
997 		return 0;
998 
999 	if (dev->netdev_ops->ndo_setup_tc)
1000 		err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
1001 						&extack);
1002 	else
1003 		err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
1004 						     &extack);
1005 	if (err < 0)
1006 		return err;
1007 
1008 	return nf_flow_table_block_setup(flowtable, &bo, cmd);
1009 }
1010 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
1011 
1012 int nf_flow_table_offload_init(void)
1013 {
1014 	nf_flow_offload_wq  = alloc_workqueue("nf_flow_table_offload",
1015 					      WQ_UNBOUND, 0);
1016 	if (!nf_flow_offload_wq)
1017 		return -ENOMEM;
1018 
1019 	return 0;
1020 }
1021 
1022 void nf_flow_table_offload_exit(void)
1023 {
1024 	destroy_workqueue(nf_flow_offload_wq);
1025 }
1026