xref: /openbmc/linux/net/openvswitch/actions.c (revision 911b8eac)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2007-2017 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/skbuff.h>
9 #include <linux/in.h>
10 #include <linux/ip.h>
11 #include <linux/openvswitch.h>
12 #include <linux/sctp.h>
13 #include <linux/tcp.h>
14 #include <linux/udp.h>
15 #include <linux/in6.h>
16 #include <linux/if_arp.h>
17 #include <linux/if_vlan.h>
18 
19 #include <net/dst.h>
20 #include <net/ip.h>
21 #include <net/ipv6.h>
22 #include <net/ip6_fib.h>
23 #include <net/checksum.h>
24 #include <net/dsfield.h>
25 #include <net/mpls.h>
26 #include <net/sctp/checksum.h>
27 
28 #include "datapath.h"
29 #include "flow.h"
30 #include "conntrack.h"
31 #include "vport.h"
32 #include "flow_netlink.h"
33 
34 struct deferred_action {
35 	struct sk_buff *skb;
36 	const struct nlattr *actions;
37 	int actions_len;
38 
39 	/* Store pkt_key clone when creating deferred action. */
40 	struct sw_flow_key pkt_key;
41 };
42 
43 #define MAX_L2_LEN	(VLAN_ETH_HLEN + 3 * MPLS_HLEN)
44 struct ovs_frag_data {
45 	unsigned long dst;
46 	struct vport *vport;
47 	struct ovs_skb_cb cb;
48 	__be16 inner_protocol;
49 	u16 network_offset;	/* valid only for MPLS */
50 	u16 vlan_tci;
51 	__be16 vlan_proto;
52 	unsigned int l2_len;
53 	u8 mac_proto;
54 	u8 l2_data[MAX_L2_LEN];
55 };
56 
57 static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
58 
59 #define DEFERRED_ACTION_FIFO_SIZE 10
60 #define OVS_RECURSION_LIMIT 5
61 #define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
62 struct action_fifo {
63 	int head;
64 	int tail;
65 	/* Deferred action fifo queue storage. */
66 	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
67 };
68 
69 struct action_flow_keys {
70 	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
71 };
72 
73 static struct action_fifo __percpu *action_fifos;
74 static struct action_flow_keys __percpu *flow_keys;
75 static DEFINE_PER_CPU(int, exec_actions_level);
76 
77 /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
78  * space. Return NULL if out of key spaces.
79  */
80 static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
81 {
82 	struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
83 	int level = this_cpu_read(exec_actions_level);
84 	struct sw_flow_key *key = NULL;
85 
86 	if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
87 		key = &keys->key[level - 1];
88 		*key = *key_;
89 	}
90 
91 	return key;
92 }
93 
94 static void action_fifo_init(struct action_fifo *fifo)
95 {
96 	fifo->head = 0;
97 	fifo->tail = 0;
98 }
99 
100 static bool action_fifo_is_empty(const struct action_fifo *fifo)
101 {
102 	return (fifo->head == fifo->tail);
103 }
104 
105 static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
106 {
107 	if (action_fifo_is_empty(fifo))
108 		return NULL;
109 
110 	return &fifo->fifo[fifo->tail++];
111 }
112 
113 static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
114 {
115 	if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
116 		return NULL;
117 
118 	return &fifo->fifo[fifo->head++];
119 }
120 
121 /* Return true if fifo is not full */
122 static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
123 				    const struct sw_flow_key *key,
124 				    const struct nlattr *actions,
125 				    const int actions_len)
126 {
127 	struct action_fifo *fifo;
128 	struct deferred_action *da;
129 
130 	fifo = this_cpu_ptr(action_fifos);
131 	da = action_fifo_put(fifo);
132 	if (da) {
133 		da->skb = skb;
134 		da->actions = actions;
135 		da->actions_len = actions_len;
136 		da->pkt_key = *key;
137 	}
138 
139 	return da;
140 }
141 
142 static void invalidate_flow_key(struct sw_flow_key *key)
143 {
144 	key->mac_proto |= SW_FLOW_KEY_INVALID;
145 }
146 
147 static bool is_flow_key_valid(const struct sw_flow_key *key)
148 {
149 	return !(key->mac_proto & SW_FLOW_KEY_INVALID);
150 }
151 
152 static int clone_execute(struct datapath *dp, struct sk_buff *skb,
153 			 struct sw_flow_key *key,
154 			 u32 recirc_id,
155 			 const struct nlattr *actions, int len,
156 			 bool last, bool clone_flow_key);
157 
158 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
159 			      struct sw_flow_key *key,
160 			      const struct nlattr *attr, int len);
161 
162 static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
163 		     __be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
164 {
165 	int err;
166 
167 	err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
168 	if (err)
169 		return err;
170 
171 	if (!mac_len)
172 		key->mac_proto = MAC_PROTO_NONE;
173 
174 	invalidate_flow_key(key);
175 	return 0;
176 }
177 
178 static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
179 		    const __be16 ethertype)
180 {
181 	int err;
182 
183 	err = skb_mpls_pop(skb, ethertype, skb->mac_len,
184 			   ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
185 	if (err)
186 		return err;
187 
188 	if (ethertype == htons(ETH_P_TEB))
189 		key->mac_proto = MAC_PROTO_ETHERNET;
190 
191 	invalidate_flow_key(key);
192 	return 0;
193 }
194 
195 static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
196 		    const __be32 *mpls_lse, const __be32 *mask)
197 {
198 	struct mpls_shim_hdr *stack;
199 	__be32 lse;
200 	int err;
201 
202 	stack = mpls_hdr(skb);
203 	lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
204 	err = skb_mpls_update_lse(skb, lse);
205 	if (err)
206 		return err;
207 
208 	flow_key->mpls.lse[0] = lse;
209 	return 0;
210 }
211 
212 static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
213 {
214 	int err;
215 
216 	err = skb_vlan_pop(skb);
217 	if (skb_vlan_tag_present(skb)) {
218 		invalidate_flow_key(key);
219 	} else {
220 		key->eth.vlan.tci = 0;
221 		key->eth.vlan.tpid = 0;
222 	}
223 	return err;
224 }
225 
226 static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
227 		     const struct ovs_action_push_vlan *vlan)
228 {
229 	if (skb_vlan_tag_present(skb)) {
230 		invalidate_flow_key(key);
231 	} else {
232 		key->eth.vlan.tci = vlan->vlan_tci;
233 		key->eth.vlan.tpid = vlan->vlan_tpid;
234 	}
235 	return skb_vlan_push(skb, vlan->vlan_tpid,
236 			     ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
237 }
238 
239 /* 'src' is already properly masked. */
240 static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
241 {
242 	u16 *dst = (u16 *)dst_;
243 	const u16 *src = (const u16 *)src_;
244 	const u16 *mask = (const u16 *)mask_;
245 
246 	OVS_SET_MASKED(dst[0], src[0], mask[0]);
247 	OVS_SET_MASKED(dst[1], src[1], mask[1]);
248 	OVS_SET_MASKED(dst[2], src[2], mask[2]);
249 }
250 
251 static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
252 			const struct ovs_key_ethernet *key,
253 			const struct ovs_key_ethernet *mask)
254 {
255 	int err;
256 
257 	err = skb_ensure_writable(skb, ETH_HLEN);
258 	if (unlikely(err))
259 		return err;
260 
261 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
262 
263 	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
264 			       mask->eth_src);
265 	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
266 			       mask->eth_dst);
267 
268 	skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
269 
270 	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
271 	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
272 	return 0;
273 }
274 
275 /* pop_eth does not support VLAN packets as this action is never called
276  * for them.
277  */
278 static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
279 {
280 	skb_pull_rcsum(skb, ETH_HLEN);
281 	skb_reset_mac_header(skb);
282 	skb_reset_mac_len(skb);
283 
284 	/* safe right before invalidate_flow_key */
285 	key->mac_proto = MAC_PROTO_NONE;
286 	invalidate_flow_key(key);
287 	return 0;
288 }
289 
290 static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
291 		    const struct ovs_action_push_eth *ethh)
292 {
293 	struct ethhdr *hdr;
294 
295 	/* Add the new Ethernet header */
296 	if (skb_cow_head(skb, ETH_HLEN) < 0)
297 		return -ENOMEM;
298 
299 	skb_push(skb, ETH_HLEN);
300 	skb_reset_mac_header(skb);
301 	skb_reset_mac_len(skb);
302 
303 	hdr = eth_hdr(skb);
304 	ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
305 	ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
306 	hdr->h_proto = skb->protocol;
307 
308 	skb_postpush_rcsum(skb, hdr, ETH_HLEN);
309 
310 	/* safe right before invalidate_flow_key */
311 	key->mac_proto = MAC_PROTO_ETHERNET;
312 	invalidate_flow_key(key);
313 	return 0;
314 }
315 
316 static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
317 		    const struct nshhdr *nh)
318 {
319 	int err;
320 
321 	err = nsh_push(skb, nh);
322 	if (err)
323 		return err;
324 
325 	/* safe right before invalidate_flow_key */
326 	key->mac_proto = MAC_PROTO_NONE;
327 	invalidate_flow_key(key);
328 	return 0;
329 }
330 
331 static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
332 {
333 	int err;
334 
335 	err = nsh_pop(skb);
336 	if (err)
337 		return err;
338 
339 	/* safe right before invalidate_flow_key */
340 	if (skb->protocol == htons(ETH_P_TEB))
341 		key->mac_proto = MAC_PROTO_ETHERNET;
342 	else
343 		key->mac_proto = MAC_PROTO_NONE;
344 	invalidate_flow_key(key);
345 	return 0;
346 }
347 
348 static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
349 				  __be32 addr, __be32 new_addr)
350 {
351 	int transport_len = skb->len - skb_transport_offset(skb);
352 
353 	if (nh->frag_off & htons(IP_OFFSET))
354 		return;
355 
356 	if (nh->protocol == IPPROTO_TCP) {
357 		if (likely(transport_len >= sizeof(struct tcphdr)))
358 			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
359 						 addr, new_addr, true);
360 	} else if (nh->protocol == IPPROTO_UDP) {
361 		if (likely(transport_len >= sizeof(struct udphdr))) {
362 			struct udphdr *uh = udp_hdr(skb);
363 
364 			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
365 				inet_proto_csum_replace4(&uh->check, skb,
366 							 addr, new_addr, true);
367 				if (!uh->check)
368 					uh->check = CSUM_MANGLED_0;
369 			}
370 		}
371 	}
372 }
373 
374 static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
375 			__be32 *addr, __be32 new_addr)
376 {
377 	update_ip_l4_checksum(skb, nh, *addr, new_addr);
378 	csum_replace4(&nh->check, *addr, new_addr);
379 	skb_clear_hash(skb);
380 	*addr = new_addr;
381 }
382 
383 static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
384 				 __be32 addr[4], const __be32 new_addr[4])
385 {
386 	int transport_len = skb->len - skb_transport_offset(skb);
387 
388 	if (l4_proto == NEXTHDR_TCP) {
389 		if (likely(transport_len >= sizeof(struct tcphdr)))
390 			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
391 						  addr, new_addr, true);
392 	} else if (l4_proto == NEXTHDR_UDP) {
393 		if (likely(transport_len >= sizeof(struct udphdr))) {
394 			struct udphdr *uh = udp_hdr(skb);
395 
396 			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
397 				inet_proto_csum_replace16(&uh->check, skb,
398 							  addr, new_addr, true);
399 				if (!uh->check)
400 					uh->check = CSUM_MANGLED_0;
401 			}
402 		}
403 	} else if (l4_proto == NEXTHDR_ICMP) {
404 		if (likely(transport_len >= sizeof(struct icmp6hdr)))
405 			inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
406 						  skb, addr, new_addr, true);
407 	}
408 }
409 
410 static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
411 			   const __be32 mask[4], __be32 masked[4])
412 {
413 	masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
414 	masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
415 	masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
416 	masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
417 }
418 
419 static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
420 			  __be32 addr[4], const __be32 new_addr[4],
421 			  bool recalculate_csum)
422 {
423 	if (recalculate_csum)
424 		update_ipv6_checksum(skb, l4_proto, addr, new_addr);
425 
426 	skb_clear_hash(skb);
427 	memcpy(addr, new_addr, sizeof(__be32[4]));
428 }
429 
430 static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
431 {
432 	/* Bits 21-24 are always unmasked, so this retains their values. */
433 	OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
434 	OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
435 	OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
436 }
437 
438 static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
439 		       u8 mask)
440 {
441 	new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
442 
443 	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
444 	nh->ttl = new_ttl;
445 }
446 
447 static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
448 		    const struct ovs_key_ipv4 *key,
449 		    const struct ovs_key_ipv4 *mask)
450 {
451 	struct iphdr *nh;
452 	__be32 new_addr;
453 	int err;
454 
455 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
456 				  sizeof(struct iphdr));
457 	if (unlikely(err))
458 		return err;
459 
460 	nh = ip_hdr(skb);
461 
462 	/* Setting an IP addresses is typically only a side effect of
463 	 * matching on them in the current userspace implementation, so it
464 	 * makes sense to check if the value actually changed.
465 	 */
466 	if (mask->ipv4_src) {
467 		new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
468 
469 		if (unlikely(new_addr != nh->saddr)) {
470 			set_ip_addr(skb, nh, &nh->saddr, new_addr);
471 			flow_key->ipv4.addr.src = new_addr;
472 		}
473 	}
474 	if (mask->ipv4_dst) {
475 		new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
476 
477 		if (unlikely(new_addr != nh->daddr)) {
478 			set_ip_addr(skb, nh, &nh->daddr, new_addr);
479 			flow_key->ipv4.addr.dst = new_addr;
480 		}
481 	}
482 	if (mask->ipv4_tos) {
483 		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
484 		flow_key->ip.tos = nh->tos;
485 	}
486 	if (mask->ipv4_ttl) {
487 		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
488 		flow_key->ip.ttl = nh->ttl;
489 	}
490 
491 	return 0;
492 }
493 
494 static bool is_ipv6_mask_nonzero(const __be32 addr[4])
495 {
496 	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
497 }
498 
499 static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
500 		    const struct ovs_key_ipv6 *key,
501 		    const struct ovs_key_ipv6 *mask)
502 {
503 	struct ipv6hdr *nh;
504 	int err;
505 
506 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
507 				  sizeof(struct ipv6hdr));
508 	if (unlikely(err))
509 		return err;
510 
511 	nh = ipv6_hdr(skb);
512 
513 	/* Setting an IP addresses is typically only a side effect of
514 	 * matching on them in the current userspace implementation, so it
515 	 * makes sense to check if the value actually changed.
516 	 */
517 	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
518 		__be32 *saddr = (__be32 *)&nh->saddr;
519 		__be32 masked[4];
520 
521 		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
522 
523 		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
524 			set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
525 				      true);
526 			memcpy(&flow_key->ipv6.addr.src, masked,
527 			       sizeof(flow_key->ipv6.addr.src));
528 		}
529 	}
530 	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
531 		unsigned int offset = 0;
532 		int flags = IP6_FH_F_SKIP_RH;
533 		bool recalc_csum = true;
534 		__be32 *daddr = (__be32 *)&nh->daddr;
535 		__be32 masked[4];
536 
537 		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
538 
539 		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
540 			if (ipv6_ext_hdr(nh->nexthdr))
541 				recalc_csum = (ipv6_find_hdr(skb, &offset,
542 							     NEXTHDR_ROUTING,
543 							     NULL, &flags)
544 					       != NEXTHDR_ROUTING);
545 
546 			set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
547 				      recalc_csum);
548 			memcpy(&flow_key->ipv6.addr.dst, masked,
549 			       sizeof(flow_key->ipv6.addr.dst));
550 		}
551 	}
552 	if (mask->ipv6_tclass) {
553 		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
554 		flow_key->ip.tos = ipv6_get_dsfield(nh);
555 	}
556 	if (mask->ipv6_label) {
557 		set_ipv6_fl(nh, ntohl(key->ipv6_label),
558 			    ntohl(mask->ipv6_label));
559 		flow_key->ipv6.label =
560 		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
561 	}
562 	if (mask->ipv6_hlimit) {
563 		OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
564 			       mask->ipv6_hlimit);
565 		flow_key->ip.ttl = nh->hop_limit;
566 	}
567 	return 0;
568 }
569 
570 static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
571 		   const struct nlattr *a)
572 {
573 	struct nshhdr *nh;
574 	size_t length;
575 	int err;
576 	u8 flags;
577 	u8 ttl;
578 	int i;
579 
580 	struct ovs_key_nsh key;
581 	struct ovs_key_nsh mask;
582 
583 	err = nsh_key_from_nlattr(a, &key, &mask);
584 	if (err)
585 		return err;
586 
587 	/* Make sure the NSH base header is there */
588 	if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
589 		return -ENOMEM;
590 
591 	nh = nsh_hdr(skb);
592 	length = nsh_hdr_len(nh);
593 
594 	/* Make sure the whole NSH header is there */
595 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
596 				       length);
597 	if (unlikely(err))
598 		return err;
599 
600 	nh = nsh_hdr(skb);
601 	skb_postpull_rcsum(skb, nh, length);
602 	flags = nsh_get_flags(nh);
603 	flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
604 	flow_key->nsh.base.flags = flags;
605 	ttl = nsh_get_ttl(nh);
606 	ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
607 	flow_key->nsh.base.ttl = ttl;
608 	nsh_set_flags_and_ttl(nh, flags, ttl);
609 	nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
610 				  mask.base.path_hdr);
611 	flow_key->nsh.base.path_hdr = nh->path_hdr;
612 	switch (nh->mdtype) {
613 	case NSH_M_TYPE1:
614 		for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
615 			nh->md1.context[i] =
616 			    OVS_MASKED(nh->md1.context[i], key.context[i],
617 				       mask.context[i]);
618 		}
619 		memcpy(flow_key->nsh.context, nh->md1.context,
620 		       sizeof(nh->md1.context));
621 		break;
622 	case NSH_M_TYPE2:
623 		memset(flow_key->nsh.context, 0,
624 		       sizeof(flow_key->nsh.context));
625 		break;
626 	default:
627 		return -EINVAL;
628 	}
629 	skb_postpush_rcsum(skb, nh, length);
630 	return 0;
631 }
632 
633 /* Must follow skb_ensure_writable() since that can move the skb data. */
634 static void set_tp_port(struct sk_buff *skb, __be16 *port,
635 			__be16 new_port, __sum16 *check)
636 {
637 	inet_proto_csum_replace2(check, skb, *port, new_port, false);
638 	*port = new_port;
639 }
640 
641 static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
642 		   const struct ovs_key_udp *key,
643 		   const struct ovs_key_udp *mask)
644 {
645 	struct udphdr *uh;
646 	__be16 src, dst;
647 	int err;
648 
649 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
650 				  sizeof(struct udphdr));
651 	if (unlikely(err))
652 		return err;
653 
654 	uh = udp_hdr(skb);
655 	/* Either of the masks is non-zero, so do not bother checking them. */
656 	src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
657 	dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
658 
659 	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
660 		if (likely(src != uh->source)) {
661 			set_tp_port(skb, &uh->source, src, &uh->check);
662 			flow_key->tp.src = src;
663 		}
664 		if (likely(dst != uh->dest)) {
665 			set_tp_port(skb, &uh->dest, dst, &uh->check);
666 			flow_key->tp.dst = dst;
667 		}
668 
669 		if (unlikely(!uh->check))
670 			uh->check = CSUM_MANGLED_0;
671 	} else {
672 		uh->source = src;
673 		uh->dest = dst;
674 		flow_key->tp.src = src;
675 		flow_key->tp.dst = dst;
676 	}
677 
678 	skb_clear_hash(skb);
679 
680 	return 0;
681 }
682 
683 static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
684 		   const struct ovs_key_tcp *key,
685 		   const struct ovs_key_tcp *mask)
686 {
687 	struct tcphdr *th;
688 	__be16 src, dst;
689 	int err;
690 
691 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
692 				  sizeof(struct tcphdr));
693 	if (unlikely(err))
694 		return err;
695 
696 	th = tcp_hdr(skb);
697 	src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
698 	if (likely(src != th->source)) {
699 		set_tp_port(skb, &th->source, src, &th->check);
700 		flow_key->tp.src = src;
701 	}
702 	dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
703 	if (likely(dst != th->dest)) {
704 		set_tp_port(skb, &th->dest, dst, &th->check);
705 		flow_key->tp.dst = dst;
706 	}
707 	skb_clear_hash(skb);
708 
709 	return 0;
710 }
711 
712 static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
713 		    const struct ovs_key_sctp *key,
714 		    const struct ovs_key_sctp *mask)
715 {
716 	unsigned int sctphoff = skb_transport_offset(skb);
717 	struct sctphdr *sh;
718 	__le32 old_correct_csum, new_csum, old_csum;
719 	int err;
720 
721 	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
722 	if (unlikely(err))
723 		return err;
724 
725 	sh = sctp_hdr(skb);
726 	old_csum = sh->checksum;
727 	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
728 
729 	sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
730 	sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
731 
732 	new_csum = sctp_compute_cksum(skb, sctphoff);
733 
734 	/* Carry any checksum errors through. */
735 	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
736 
737 	skb_clear_hash(skb);
738 	flow_key->tp.src = sh->source;
739 	flow_key->tp.dst = sh->dest;
740 
741 	return 0;
742 }
743 
744 static int ovs_vport_output(struct net *net, struct sock *sk,
745 			    struct sk_buff *skb)
746 {
747 	struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
748 	struct vport *vport = data->vport;
749 
750 	if (skb_cow_head(skb, data->l2_len) < 0) {
751 		kfree_skb(skb);
752 		return -ENOMEM;
753 	}
754 
755 	__skb_dst_copy(skb, data->dst);
756 	*OVS_CB(skb) = data->cb;
757 	skb->inner_protocol = data->inner_protocol;
758 	if (data->vlan_tci & VLAN_CFI_MASK)
759 		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci & ~VLAN_CFI_MASK);
760 	else
761 		__vlan_hwaccel_clear_tag(skb);
762 
763 	/* Reconstruct the MAC header.  */
764 	skb_push(skb, data->l2_len);
765 	memcpy(skb->data, &data->l2_data, data->l2_len);
766 	skb_postpush_rcsum(skb, skb->data, data->l2_len);
767 	skb_reset_mac_header(skb);
768 
769 	if (eth_p_mpls(skb->protocol)) {
770 		skb->inner_network_header = skb->network_header;
771 		skb_set_network_header(skb, data->network_offset);
772 		skb_reset_mac_len(skb);
773 	}
774 
775 	ovs_vport_send(vport, skb, data->mac_proto);
776 	return 0;
777 }
778 
779 static unsigned int
780 ovs_dst_get_mtu(const struct dst_entry *dst)
781 {
782 	return dst->dev->mtu;
783 }
784 
785 static struct dst_ops ovs_dst_ops = {
786 	.family = AF_UNSPEC,
787 	.mtu = ovs_dst_get_mtu,
788 };
789 
790 /* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
791  * ovs_vport_output(), which is called once per fragmented packet.
792  */
793 static void prepare_frag(struct vport *vport, struct sk_buff *skb,
794 			 u16 orig_network_offset, u8 mac_proto)
795 {
796 	unsigned int hlen = skb_network_offset(skb);
797 	struct ovs_frag_data *data;
798 
799 	data = this_cpu_ptr(&ovs_frag_data_storage);
800 	data->dst = skb->_skb_refdst;
801 	data->vport = vport;
802 	data->cb = *OVS_CB(skb);
803 	data->inner_protocol = skb->inner_protocol;
804 	data->network_offset = orig_network_offset;
805 	if (skb_vlan_tag_present(skb))
806 		data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK;
807 	else
808 		data->vlan_tci = 0;
809 	data->vlan_proto = skb->vlan_proto;
810 	data->mac_proto = mac_proto;
811 	data->l2_len = hlen;
812 	memcpy(&data->l2_data, skb->data, hlen);
813 
814 	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
815 	skb_pull(skb, hlen);
816 }
817 
818 static void ovs_fragment(struct net *net, struct vport *vport,
819 			 struct sk_buff *skb, u16 mru,
820 			 struct sw_flow_key *key)
821 {
822 	u16 orig_network_offset = 0;
823 
824 	if (eth_p_mpls(skb->protocol)) {
825 		orig_network_offset = skb_network_offset(skb);
826 		skb->network_header = skb->inner_network_header;
827 	}
828 
829 	if (skb_network_offset(skb) > MAX_L2_LEN) {
830 		OVS_NLERR(1, "L2 header too long to fragment");
831 		goto err;
832 	}
833 
834 	if (key->eth.type == htons(ETH_P_IP)) {
835 		struct dst_entry ovs_dst;
836 		unsigned long orig_dst;
837 
838 		prepare_frag(vport, skb, orig_network_offset,
839 			     ovs_key_mac_proto(key));
840 		dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
841 			 DST_OBSOLETE_NONE, DST_NOCOUNT);
842 		ovs_dst.dev = vport->dev;
843 
844 		orig_dst = skb->_skb_refdst;
845 		skb_dst_set_noref(skb, &ovs_dst);
846 		IPCB(skb)->frag_max_size = mru;
847 
848 		ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
849 		refdst_drop(orig_dst);
850 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
851 		unsigned long orig_dst;
852 		struct rt6_info ovs_rt;
853 
854 		prepare_frag(vport, skb, orig_network_offset,
855 			     ovs_key_mac_proto(key));
856 		memset(&ovs_rt, 0, sizeof(ovs_rt));
857 		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
858 			 DST_OBSOLETE_NONE, DST_NOCOUNT);
859 		ovs_rt.dst.dev = vport->dev;
860 
861 		orig_dst = skb->_skb_refdst;
862 		skb_dst_set_noref(skb, &ovs_rt.dst);
863 		IP6CB(skb)->frag_max_size = mru;
864 
865 		ipv6_stub->ipv6_fragment(net, skb->sk, skb, ovs_vport_output);
866 		refdst_drop(orig_dst);
867 	} else {
868 		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
869 			  ovs_vport_name(vport), ntohs(key->eth.type), mru,
870 			  vport->dev->mtu);
871 		goto err;
872 	}
873 
874 	return;
875 err:
876 	kfree_skb(skb);
877 }
878 
879 static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
880 		      struct sw_flow_key *key)
881 {
882 	struct vport *vport = ovs_vport_rcu(dp, out_port);
883 
884 	if (likely(vport)) {
885 		u16 mru = OVS_CB(skb)->mru;
886 		u32 cutlen = OVS_CB(skb)->cutlen;
887 
888 		if (unlikely(cutlen > 0)) {
889 			if (skb->len - cutlen > ovs_mac_header_len(key))
890 				pskb_trim(skb, skb->len - cutlen);
891 			else
892 				pskb_trim(skb, ovs_mac_header_len(key));
893 		}
894 
895 		if (likely(!mru ||
896 		           (skb->len <= mru + vport->dev->hard_header_len))) {
897 			ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
898 		} else if (mru <= vport->dev->mtu) {
899 			struct net *net = read_pnet(&dp->net);
900 
901 			ovs_fragment(net, vport, skb, mru, key);
902 		} else {
903 			kfree_skb(skb);
904 		}
905 	} else {
906 		kfree_skb(skb);
907 	}
908 }
909 
910 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
911 			    struct sw_flow_key *key, const struct nlattr *attr,
912 			    const struct nlattr *actions, int actions_len,
913 			    uint32_t cutlen)
914 {
915 	struct dp_upcall_info upcall;
916 	const struct nlattr *a;
917 	int rem;
918 
919 	memset(&upcall, 0, sizeof(upcall));
920 	upcall.cmd = OVS_PACKET_CMD_ACTION;
921 	upcall.mru = OVS_CB(skb)->mru;
922 
923 	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
924 	     a = nla_next(a, &rem)) {
925 		switch (nla_type(a)) {
926 		case OVS_USERSPACE_ATTR_USERDATA:
927 			upcall.userdata = a;
928 			break;
929 
930 		case OVS_USERSPACE_ATTR_PID:
931 			upcall.portid = nla_get_u32(a);
932 			break;
933 
934 		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
935 			/* Get out tunnel info. */
936 			struct vport *vport;
937 
938 			vport = ovs_vport_rcu(dp, nla_get_u32(a));
939 			if (vport) {
940 				int err;
941 
942 				err = dev_fill_metadata_dst(vport->dev, skb);
943 				if (!err)
944 					upcall.egress_tun_info = skb_tunnel_info(skb);
945 			}
946 
947 			break;
948 		}
949 
950 		case OVS_USERSPACE_ATTR_ACTIONS: {
951 			/* Include actions. */
952 			upcall.actions = actions;
953 			upcall.actions_len = actions_len;
954 			break;
955 		}
956 
957 		} /* End of switch. */
958 	}
959 
960 	return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
961 }
962 
963 static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
964 				     struct sw_flow_key *key,
965 				     const struct nlattr *attr, bool last)
966 {
967 	/* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */
968 	struct nlattr *dec_ttl_arg = nla_data(attr);
969 	int rem = nla_len(attr);
970 
971 	if (nla_len(dec_ttl_arg)) {
972 		struct nlattr *actions = nla_next(dec_ttl_arg, &rem);
973 
974 		if (actions)
975 			return clone_execute(dp, skb, key, 0, actions, rem,
976 					     last, false);
977 	}
978 	consume_skb(skb);
979 	return 0;
980 }
981 
982 /* When 'last' is true, sample() should always consume the 'skb'.
983  * Otherwise, sample() should keep 'skb' intact regardless what
984  * actions are executed within sample().
985  */
986 static int sample(struct datapath *dp, struct sk_buff *skb,
987 		  struct sw_flow_key *key, const struct nlattr *attr,
988 		  bool last)
989 {
990 	struct nlattr *actions;
991 	struct nlattr *sample_arg;
992 	int rem = nla_len(attr);
993 	const struct sample_arg *arg;
994 	bool clone_flow_key;
995 
996 	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
997 	sample_arg = nla_data(attr);
998 	arg = nla_data(sample_arg);
999 	actions = nla_next(sample_arg, &rem);
1000 
1001 	if ((arg->probability != U32_MAX) &&
1002 	    (!arg->probability || prandom_u32() > arg->probability)) {
1003 		if (last)
1004 			consume_skb(skb);
1005 		return 0;
1006 	}
1007 
1008 	clone_flow_key = !arg->exec;
1009 	return clone_execute(dp, skb, key, 0, actions, rem, last,
1010 			     clone_flow_key);
1011 }
1012 
1013 /* When 'last' is true, clone() should always consume the 'skb'.
1014  * Otherwise, clone() should keep 'skb' intact regardless what
1015  * actions are executed within clone().
1016  */
1017 static int clone(struct datapath *dp, struct sk_buff *skb,
1018 		 struct sw_flow_key *key, const struct nlattr *attr,
1019 		 bool last)
1020 {
1021 	struct nlattr *actions;
1022 	struct nlattr *clone_arg;
1023 	int rem = nla_len(attr);
1024 	bool dont_clone_flow_key;
1025 
1026 	/* The first action is always 'OVS_CLONE_ATTR_ARG'. */
1027 	clone_arg = nla_data(attr);
1028 	dont_clone_flow_key = nla_get_u32(clone_arg);
1029 	actions = nla_next(clone_arg, &rem);
1030 
1031 	return clone_execute(dp, skb, key, 0, actions, rem, last,
1032 			     !dont_clone_flow_key);
1033 }
1034 
1035 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
1036 			 const struct nlattr *attr)
1037 {
1038 	struct ovs_action_hash *hash_act = nla_data(attr);
1039 	u32 hash = 0;
1040 
1041 	/* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
1042 	hash = skb_get_hash(skb);
1043 	hash = jhash_1word(hash, hash_act->hash_basis);
1044 	if (!hash)
1045 		hash = 0x1;
1046 
1047 	key->ovs_flow_hash = hash;
1048 }
1049 
1050 static int execute_set_action(struct sk_buff *skb,
1051 			      struct sw_flow_key *flow_key,
1052 			      const struct nlattr *a)
1053 {
1054 	/* Only tunnel set execution is supported without a mask. */
1055 	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
1056 		struct ovs_tunnel_info *tun = nla_data(a);
1057 
1058 		skb_dst_drop(skb);
1059 		dst_hold((struct dst_entry *)tun->tun_dst);
1060 		skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);
1061 		return 0;
1062 	}
1063 
1064 	return -EINVAL;
1065 }
1066 
1067 /* Mask is at the midpoint of the data. */
1068 #define get_mask(a, type) ((const type)nla_data(a) + 1)
1069 
1070 static int execute_masked_set_action(struct sk_buff *skb,
1071 				     struct sw_flow_key *flow_key,
1072 				     const struct nlattr *a)
1073 {
1074 	int err = 0;
1075 
1076 	switch (nla_type(a)) {
1077 	case OVS_KEY_ATTR_PRIORITY:
1078 		OVS_SET_MASKED(skb->priority, nla_get_u32(a),
1079 			       *get_mask(a, u32 *));
1080 		flow_key->phy.priority = skb->priority;
1081 		break;
1082 
1083 	case OVS_KEY_ATTR_SKB_MARK:
1084 		OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
1085 		flow_key->phy.skb_mark = skb->mark;
1086 		break;
1087 
1088 	case OVS_KEY_ATTR_TUNNEL_INFO:
1089 		/* Masked data not supported for tunnel. */
1090 		err = -EINVAL;
1091 		break;
1092 
1093 	case OVS_KEY_ATTR_ETHERNET:
1094 		err = set_eth_addr(skb, flow_key, nla_data(a),
1095 				   get_mask(a, struct ovs_key_ethernet *));
1096 		break;
1097 
1098 	case OVS_KEY_ATTR_NSH:
1099 		err = set_nsh(skb, flow_key, a);
1100 		break;
1101 
1102 	case OVS_KEY_ATTR_IPV4:
1103 		err = set_ipv4(skb, flow_key, nla_data(a),
1104 			       get_mask(a, struct ovs_key_ipv4 *));
1105 		break;
1106 
1107 	case OVS_KEY_ATTR_IPV6:
1108 		err = set_ipv6(skb, flow_key, nla_data(a),
1109 			       get_mask(a, struct ovs_key_ipv6 *));
1110 		break;
1111 
1112 	case OVS_KEY_ATTR_TCP:
1113 		err = set_tcp(skb, flow_key, nla_data(a),
1114 			      get_mask(a, struct ovs_key_tcp *));
1115 		break;
1116 
1117 	case OVS_KEY_ATTR_UDP:
1118 		err = set_udp(skb, flow_key, nla_data(a),
1119 			      get_mask(a, struct ovs_key_udp *));
1120 		break;
1121 
1122 	case OVS_KEY_ATTR_SCTP:
1123 		err = set_sctp(skb, flow_key, nla_data(a),
1124 			       get_mask(a, struct ovs_key_sctp *));
1125 		break;
1126 
1127 	case OVS_KEY_ATTR_MPLS:
1128 		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
1129 								    __be32 *));
1130 		break;
1131 
1132 	case OVS_KEY_ATTR_CT_STATE:
1133 	case OVS_KEY_ATTR_CT_ZONE:
1134 	case OVS_KEY_ATTR_CT_MARK:
1135 	case OVS_KEY_ATTR_CT_LABELS:
1136 	case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
1137 	case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
1138 		err = -EINVAL;
1139 		break;
1140 	}
1141 
1142 	return err;
1143 }
1144 
1145 static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
1146 			  struct sw_flow_key *key,
1147 			  const struct nlattr *a, bool last)
1148 {
1149 	u32 recirc_id;
1150 
1151 	if (!is_flow_key_valid(key)) {
1152 		int err;
1153 
1154 		err = ovs_flow_key_update(skb, key);
1155 		if (err)
1156 			return err;
1157 	}
1158 	BUG_ON(!is_flow_key_valid(key));
1159 
1160 	recirc_id = nla_get_u32(a);
1161 	return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
1162 }
1163 
1164 static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
1165 				 struct sw_flow_key *key,
1166 				 const struct nlattr *attr, bool last)
1167 {
1168 	struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
1169 	const struct nlattr *actions, *cpl_arg;
1170 	int len, max_len, rem = nla_len(attr);
1171 	const struct check_pkt_len_arg *arg;
1172 	bool clone_flow_key;
1173 
1174 	/* The first netlink attribute in 'attr' is always
1175 	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
1176 	 */
1177 	cpl_arg = nla_data(attr);
1178 	arg = nla_data(cpl_arg);
1179 
1180 	len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
1181 	max_len = arg->pkt_len;
1182 
1183 	if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
1184 	    len <= max_len) {
1185 		/* Second netlink attribute in 'attr' is always
1186 		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
1187 		 */
1188 		actions = nla_next(cpl_arg, &rem);
1189 		clone_flow_key = !arg->exec_for_lesser_equal;
1190 	} else {
1191 		/* Third netlink attribute in 'attr' is always
1192 		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
1193 		 */
1194 		actions = nla_next(cpl_arg, &rem);
1195 		actions = nla_next(actions, &rem);
1196 		clone_flow_key = !arg->exec_for_greater;
1197 	}
1198 
1199 	return clone_execute(dp, skb, key, 0, nla_data(actions),
1200 			     nla_len(actions), last, clone_flow_key);
1201 }
1202 
1203 static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
1204 {
1205 	int err;
1206 
1207 	if (skb->protocol == htons(ETH_P_IPV6)) {
1208 		struct ipv6hdr *nh;
1209 
1210 		err = skb_ensure_writable(skb, skb_network_offset(skb) +
1211 					  sizeof(*nh));
1212 		if (unlikely(err))
1213 			return err;
1214 
1215 		nh = ipv6_hdr(skb);
1216 
1217 		if (nh->hop_limit <= 1)
1218 			return -EHOSTUNREACH;
1219 
1220 		key->ip.ttl = --nh->hop_limit;
1221 	} else {
1222 		struct iphdr *nh;
1223 		u8 old_ttl;
1224 
1225 		err = skb_ensure_writable(skb, skb_network_offset(skb) +
1226 					  sizeof(*nh));
1227 		if (unlikely(err))
1228 			return err;
1229 
1230 		nh = ip_hdr(skb);
1231 		if (nh->ttl <= 1)
1232 			return -EHOSTUNREACH;
1233 
1234 		old_ttl = nh->ttl--;
1235 		csum_replace2(&nh->check, htons(old_ttl << 8),
1236 			      htons(nh->ttl << 8));
1237 		key->ip.ttl = nh->ttl;
1238 	}
1239 	return 0;
1240 }
1241 
1242 /* Execute a list of actions against 'skb'. */
1243 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1244 			      struct sw_flow_key *key,
1245 			      const struct nlattr *attr, int len)
1246 {
1247 	const struct nlattr *a;
1248 	int rem;
1249 
1250 	for (a = attr, rem = len; rem > 0;
1251 	     a = nla_next(a, &rem)) {
1252 		int err = 0;
1253 
1254 		switch (nla_type(a)) {
1255 		case OVS_ACTION_ATTR_OUTPUT: {
1256 			int port = nla_get_u32(a);
1257 			struct sk_buff *clone;
1258 
1259 			/* Every output action needs a separate clone
1260 			 * of 'skb', In case the output action is the
1261 			 * last action, cloning can be avoided.
1262 			 */
1263 			if (nla_is_last(a, rem)) {
1264 				do_output(dp, skb, port, key);
1265 				/* 'skb' has been used for output.
1266 				 */
1267 				return 0;
1268 			}
1269 
1270 			clone = skb_clone(skb, GFP_ATOMIC);
1271 			if (clone)
1272 				do_output(dp, clone, port, key);
1273 			OVS_CB(skb)->cutlen = 0;
1274 			break;
1275 		}
1276 
1277 		case OVS_ACTION_ATTR_TRUNC: {
1278 			struct ovs_action_trunc *trunc = nla_data(a);
1279 
1280 			if (skb->len > trunc->max_len)
1281 				OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
1282 			break;
1283 		}
1284 
1285 		case OVS_ACTION_ATTR_USERSPACE:
1286 			output_userspace(dp, skb, key, a, attr,
1287 						     len, OVS_CB(skb)->cutlen);
1288 			OVS_CB(skb)->cutlen = 0;
1289 			break;
1290 
1291 		case OVS_ACTION_ATTR_HASH:
1292 			execute_hash(skb, key, a);
1293 			break;
1294 
1295 		case OVS_ACTION_ATTR_PUSH_MPLS: {
1296 			struct ovs_action_push_mpls *mpls = nla_data(a);
1297 
1298 			err = push_mpls(skb, key, mpls->mpls_lse,
1299 					mpls->mpls_ethertype, skb->mac_len);
1300 			break;
1301 		}
1302 		case OVS_ACTION_ATTR_ADD_MPLS: {
1303 			struct ovs_action_add_mpls *mpls = nla_data(a);
1304 			__u16 mac_len = 0;
1305 
1306 			if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
1307 				mac_len = skb->mac_len;
1308 
1309 			err = push_mpls(skb, key, mpls->mpls_lse,
1310 					mpls->mpls_ethertype, mac_len);
1311 			break;
1312 		}
1313 		case OVS_ACTION_ATTR_POP_MPLS:
1314 			err = pop_mpls(skb, key, nla_get_be16(a));
1315 			break;
1316 
1317 		case OVS_ACTION_ATTR_PUSH_VLAN:
1318 			err = push_vlan(skb, key, nla_data(a));
1319 			break;
1320 
1321 		case OVS_ACTION_ATTR_POP_VLAN:
1322 			err = pop_vlan(skb, key);
1323 			break;
1324 
1325 		case OVS_ACTION_ATTR_RECIRC: {
1326 			bool last = nla_is_last(a, rem);
1327 
1328 			err = execute_recirc(dp, skb, key, a, last);
1329 			if (last) {
1330 				/* If this is the last action, the skb has
1331 				 * been consumed or freed.
1332 				 * Return immediately.
1333 				 */
1334 				return err;
1335 			}
1336 			break;
1337 		}
1338 
1339 		case OVS_ACTION_ATTR_SET:
1340 			err = execute_set_action(skb, key, nla_data(a));
1341 			break;
1342 
1343 		case OVS_ACTION_ATTR_SET_MASKED:
1344 		case OVS_ACTION_ATTR_SET_TO_MASKED:
1345 			err = execute_masked_set_action(skb, key, nla_data(a));
1346 			break;
1347 
1348 		case OVS_ACTION_ATTR_SAMPLE: {
1349 			bool last = nla_is_last(a, rem);
1350 
1351 			err = sample(dp, skb, key, a, last);
1352 			if (last)
1353 				return err;
1354 
1355 			break;
1356 		}
1357 
1358 		case OVS_ACTION_ATTR_CT:
1359 			if (!is_flow_key_valid(key)) {
1360 				err = ovs_flow_key_update(skb, key);
1361 				if (err)
1362 					return err;
1363 			}
1364 
1365 			err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
1366 					     nla_data(a));
1367 
1368 			/* Hide stolen IP fragments from user space. */
1369 			if (err)
1370 				return err == -EINPROGRESS ? 0 : err;
1371 			break;
1372 
1373 		case OVS_ACTION_ATTR_CT_CLEAR:
1374 			err = ovs_ct_clear(skb, key);
1375 			break;
1376 
1377 		case OVS_ACTION_ATTR_PUSH_ETH:
1378 			err = push_eth(skb, key, nla_data(a));
1379 			break;
1380 
1381 		case OVS_ACTION_ATTR_POP_ETH:
1382 			err = pop_eth(skb, key);
1383 			break;
1384 
1385 		case OVS_ACTION_ATTR_PUSH_NSH: {
1386 			u8 buffer[NSH_HDR_MAX_LEN];
1387 			struct nshhdr *nh = (struct nshhdr *)buffer;
1388 
1389 			err = nsh_hdr_from_nlattr(nla_data(a), nh,
1390 						  NSH_HDR_MAX_LEN);
1391 			if (unlikely(err))
1392 				break;
1393 			err = push_nsh(skb, key, nh);
1394 			break;
1395 		}
1396 
1397 		case OVS_ACTION_ATTR_POP_NSH:
1398 			err = pop_nsh(skb, key);
1399 			break;
1400 
1401 		case OVS_ACTION_ATTR_METER:
1402 			if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
1403 				consume_skb(skb);
1404 				return 0;
1405 			}
1406 			break;
1407 
1408 		case OVS_ACTION_ATTR_CLONE: {
1409 			bool last = nla_is_last(a, rem);
1410 
1411 			err = clone(dp, skb, key, a, last);
1412 			if (last)
1413 				return err;
1414 
1415 			break;
1416 		}
1417 
1418 		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
1419 			bool last = nla_is_last(a, rem);
1420 
1421 			err = execute_check_pkt_len(dp, skb, key, a, last);
1422 			if (last)
1423 				return err;
1424 
1425 			break;
1426 		}
1427 
1428 		case OVS_ACTION_ATTR_DEC_TTL:
1429 			err = execute_dec_ttl(skb, key);
1430 			if (err == -EHOSTUNREACH) {
1431 				err = dec_ttl_exception_handler(dp, skb, key,
1432 								a, true);
1433 				return err;
1434 			}
1435 			break;
1436 		}
1437 
1438 		if (unlikely(err)) {
1439 			kfree_skb(skb);
1440 			return err;
1441 		}
1442 	}
1443 
1444 	consume_skb(skb);
1445 	return 0;
1446 }
1447 
1448 /* Execute the actions on the clone of the packet. The effect of the
1449  * execution does not affect the original 'skb' nor the original 'key'.
1450  *
1451  * The execution may be deferred in case the actions can not be executed
1452  * immediately.
1453  */
1454 static int clone_execute(struct datapath *dp, struct sk_buff *skb,
1455 			 struct sw_flow_key *key, u32 recirc_id,
1456 			 const struct nlattr *actions, int len,
1457 			 bool last, bool clone_flow_key)
1458 {
1459 	struct deferred_action *da;
1460 	struct sw_flow_key *clone;
1461 
1462 	skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
1463 	if (!skb) {
1464 		/* Out of memory, skip this action.
1465 		 */
1466 		return 0;
1467 	}
1468 
1469 	/* When clone_flow_key is false, the 'key' will not be change
1470 	 * by the actions, then the 'key' can be used directly.
1471 	 * Otherwise, try to clone key from the next recursion level of
1472 	 * 'flow_keys'. If clone is successful, execute the actions
1473 	 * without deferring.
1474 	 */
1475 	clone = clone_flow_key ? clone_key(key) : key;
1476 	if (clone) {
1477 		int err = 0;
1478 
1479 		if (actions) { /* Sample action */
1480 			if (clone_flow_key)
1481 				__this_cpu_inc(exec_actions_level);
1482 
1483 			err = do_execute_actions(dp, skb, clone,
1484 						 actions, len);
1485 
1486 			if (clone_flow_key)
1487 				__this_cpu_dec(exec_actions_level);
1488 		} else { /* Recirc action */
1489 			clone->recirc_id = recirc_id;
1490 			ovs_dp_process_packet(skb, clone);
1491 		}
1492 		return err;
1493 	}
1494 
1495 	/* Out of 'flow_keys' space. Defer actions */
1496 	da = add_deferred_actions(skb, key, actions, len);
1497 	if (da) {
1498 		if (!actions) { /* Recirc action */
1499 			key = &da->pkt_key;
1500 			key->recirc_id = recirc_id;
1501 		}
1502 	} else {
1503 		/* Out of per CPU action FIFO space. Drop the 'skb' and
1504 		 * log an error.
1505 		 */
1506 		kfree_skb(skb);
1507 
1508 		if (net_ratelimit()) {
1509 			if (actions) { /* Sample action */
1510 				pr_warn("%s: deferred action limit reached, drop sample action\n",
1511 					ovs_dp_name(dp));
1512 			} else {  /* Recirc action */
1513 				pr_warn("%s: deferred action limit reached, drop recirc action\n",
1514 					ovs_dp_name(dp));
1515 			}
1516 		}
1517 	}
1518 	return 0;
1519 }
1520 
1521 static void process_deferred_actions(struct datapath *dp)
1522 {
1523 	struct action_fifo *fifo = this_cpu_ptr(action_fifos);
1524 
1525 	/* Do not touch the FIFO in case there is no deferred actions. */
1526 	if (action_fifo_is_empty(fifo))
1527 		return;
1528 
1529 	/* Finishing executing all deferred actions. */
1530 	do {
1531 		struct deferred_action *da = action_fifo_get(fifo);
1532 		struct sk_buff *skb = da->skb;
1533 		struct sw_flow_key *key = &da->pkt_key;
1534 		const struct nlattr *actions = da->actions;
1535 		int actions_len = da->actions_len;
1536 
1537 		if (actions)
1538 			do_execute_actions(dp, skb, key, actions, actions_len);
1539 		else
1540 			ovs_dp_process_packet(skb, key);
1541 	} while (!action_fifo_is_empty(fifo));
1542 
1543 	/* Reset FIFO for the next packet.  */
1544 	action_fifo_init(fifo);
1545 }
1546 
1547 /* Execute a list of actions against 'skb'. */
1548 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
1549 			const struct sw_flow_actions *acts,
1550 			struct sw_flow_key *key)
1551 {
1552 	int err, level;
1553 
1554 	level = __this_cpu_inc_return(exec_actions_level);
1555 	if (unlikely(level > OVS_RECURSION_LIMIT)) {
1556 		net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
1557 				     ovs_dp_name(dp));
1558 		kfree_skb(skb);
1559 		err = -ENETDOWN;
1560 		goto out;
1561 	}
1562 
1563 	OVS_CB(skb)->acts_origlen = acts->orig_len;
1564 	err = do_execute_actions(dp, skb, key,
1565 				 acts->actions, acts->actions_len);
1566 
1567 	if (level == 1)
1568 		process_deferred_actions(dp);
1569 
1570 out:
1571 	__this_cpu_dec(exec_actions_level);
1572 	return err;
1573 }
1574 
1575 int action_fifos_init(void)
1576 {
1577 	action_fifos = alloc_percpu(struct action_fifo);
1578 	if (!action_fifos)
1579 		return -ENOMEM;
1580 
1581 	flow_keys = alloc_percpu(struct action_flow_keys);
1582 	if (!flow_keys) {
1583 		free_percpu(action_fifos);
1584 		return -ENOMEM;
1585 	}
1586 
1587 	return 0;
1588 }
1589 
1590 void action_fifos_exit(void)
1591 {
1592 	free_percpu(action_fifos);
1593 	free_percpu(flow_keys);
1594 }
1595