xref: /openbmc/linux/net/openvswitch/datapath.c (revision 0ef9d78b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2007-2014 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/if_arp.h>
11 #include <linux/if_vlan.h>
12 #include <linux/in.h>
13 #include <linux/ip.h>
14 #include <linux/jhash.h>
15 #include <linux/delay.h>
16 #include <linux/time.h>
17 #include <linux/etherdevice.h>
18 #include <linux/genetlink.h>
19 #include <linux/kernel.h>
20 #include <linux/kthread.h>
21 #include <linux/mutex.h>
22 #include <linux/percpu.h>
23 #include <linux/rcupdate.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/ethtool.h>
27 #include <linux/wait.h>
28 #include <asm/div64.h>
29 #include <linux/highmem.h>
30 #include <linux/netfilter_bridge.h>
31 #include <linux/netfilter_ipv4.h>
32 #include <linux/inetdevice.h>
33 #include <linux/list.h>
34 #include <linux/openvswitch.h>
35 #include <linux/rculist.h>
36 #include <linux/dmi.h>
37 #include <net/genetlink.h>
38 #include <net/gso.h>
39 #include <net/net_namespace.h>
40 #include <net/netns/generic.h>
41 #include <net/pkt_cls.h>
42 
43 #include "datapath.h"
44 #include "drop.h"
45 #include "flow.h"
46 #include "flow_table.h"
47 #include "flow_netlink.h"
48 #include "meter.h"
49 #include "openvswitch_trace.h"
50 #include "vport-internal_dev.h"
51 #include "vport-netdev.h"
52 
53 unsigned int ovs_net_id __read_mostly;
54 
55 static struct genl_family dp_packet_genl_family;
56 static struct genl_family dp_flow_genl_family;
57 static struct genl_family dp_datapath_genl_family;
58 
59 static const struct nla_policy flow_policy[];
60 
61 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
62 	.name = OVS_FLOW_MCGROUP,
63 };
64 
65 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
66 	.name = OVS_DATAPATH_MCGROUP,
67 };
68 
69 static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
70 	.name = OVS_VPORT_MCGROUP,
71 };
72 
73 /* Check if need to build a reply message.
74  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
75 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
76 			    unsigned int group)
77 {
78 	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
79 	       genl_has_listeners(family, genl_info_net(info), group);
80 }
81 
82 static void ovs_notify(struct genl_family *family,
83 		       struct sk_buff *skb, struct genl_info *info)
84 {
85 	genl_notify(family, skb, info, 0, GFP_KERNEL);
86 }
87 
88 /**
89  * DOC: Locking:
90  *
91  * All writes e.g. Writes to device state (add/remove datapath, port, set
92  * operations on vports, etc.), Writes to other state (flow table
93  * modifications, set miscellaneous datapath parameters, etc.) are protected
94  * by ovs_lock.
95  *
96  * Reads are protected by RCU.
97  *
98  * There are a few special cases (mostly stats) that have their own
99  * synchronization but they nest under all of above and don't interact with
100  * each other.
101  *
102  * The RTNL lock nests inside ovs_mutex.
103  */
104 
105 static DEFINE_MUTEX(ovs_mutex);
106 
107 void ovs_lock(void)
108 {
109 	mutex_lock(&ovs_mutex);
110 }
111 
112 void ovs_unlock(void)
113 {
114 	mutex_unlock(&ovs_mutex);
115 }
116 
117 #ifdef CONFIG_LOCKDEP
118 int lockdep_ovsl_is_held(void)
119 {
120 	if (debug_locks)
121 		return lockdep_is_held(&ovs_mutex);
122 	else
123 		return 1;
124 }
125 #endif
126 
127 static struct vport *new_vport(const struct vport_parms *);
128 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
129 			     const struct sw_flow_key *,
130 			     const struct dp_upcall_info *,
131 			     uint32_t cutlen);
132 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
133 				  const struct sw_flow_key *,
134 				  const struct dp_upcall_info *,
135 				  uint32_t cutlen);
136 
137 static void ovs_dp_masks_rebalance(struct work_struct *work);
138 
139 static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
140 
141 /* Must be called with rcu_read_lock or ovs_mutex. */
142 const char *ovs_dp_name(const struct datapath *dp)
143 {
144 	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
145 	return ovs_vport_name(vport);
146 }
147 
148 static int get_dpifindex(const struct datapath *dp)
149 {
150 	struct vport *local;
151 	int ifindex;
152 
153 	rcu_read_lock();
154 
155 	local = ovs_vport_rcu(dp, OVSP_LOCAL);
156 	if (local)
157 		ifindex = local->dev->ifindex;
158 	else
159 		ifindex = 0;
160 
161 	rcu_read_unlock();
162 
163 	return ifindex;
164 }
165 
166 static void destroy_dp_rcu(struct rcu_head *rcu)
167 {
168 	struct datapath *dp = container_of(rcu, struct datapath, rcu);
169 
170 	ovs_flow_tbl_destroy(&dp->table);
171 	free_percpu(dp->stats_percpu);
172 	kfree(dp->ports);
173 	ovs_meters_exit(dp);
174 	kfree(rcu_dereference_raw(dp->upcall_portids));
175 	kfree(dp);
176 }
177 
178 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
179 					    u16 port_no)
180 {
181 	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
182 }
183 
184 /* Called with ovs_mutex or RCU read lock. */
185 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
186 {
187 	struct vport *vport;
188 	struct hlist_head *head;
189 
190 	head = vport_hash_bucket(dp, port_no);
191 	hlist_for_each_entry_rcu(vport, head, dp_hash_node,
192 				 lockdep_ovsl_is_held()) {
193 		if (vport->port_no == port_no)
194 			return vport;
195 	}
196 	return NULL;
197 }
198 
199 /* Called with ovs_mutex. */
200 static struct vport *new_vport(const struct vport_parms *parms)
201 {
202 	struct vport *vport;
203 
204 	vport = ovs_vport_add(parms);
205 	if (!IS_ERR(vport)) {
206 		struct datapath *dp = parms->dp;
207 		struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
208 
209 		hlist_add_head_rcu(&vport->dp_hash_node, head);
210 	}
211 	return vport;
212 }
213 
214 static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
215 					  const struct dp_upcall_info *upcall_info,
216 					  bool upcall_result)
217 {
218 	struct vport *p = OVS_CB(skb)->input_vport;
219 	struct vport_upcall_stats_percpu *stats;
220 
221 	if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
222 	    upcall_info->cmd != OVS_PACKET_CMD_ACTION)
223 		return;
224 
225 	stats = this_cpu_ptr(p->upcall_stats);
226 	u64_stats_update_begin(&stats->syncp);
227 	if (upcall_result)
228 		u64_stats_inc(&stats->n_success);
229 	else
230 		u64_stats_inc(&stats->n_fail);
231 	u64_stats_update_end(&stats->syncp);
232 }
233 
234 void ovs_dp_detach_port(struct vport *p)
235 {
236 	ASSERT_OVSL();
237 
238 	/* First drop references to device. */
239 	hlist_del_rcu(&p->dp_hash_node);
240 
241 	/* Then destroy it. */
242 	ovs_vport_del(p);
243 }
244 
245 /* Must be called with rcu_read_lock. */
246 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
247 {
248 	const struct vport *p = OVS_CB(skb)->input_vport;
249 	struct datapath *dp = p->dp;
250 	struct sw_flow *flow;
251 	struct sw_flow_actions *sf_acts;
252 	struct dp_stats_percpu *stats;
253 	u64 *stats_counter;
254 	u32 n_mask_hit;
255 	u32 n_cache_hit;
256 	int error;
257 
258 	stats = this_cpu_ptr(dp->stats_percpu);
259 
260 	/* Look up flow. */
261 	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
262 					 &n_mask_hit, &n_cache_hit);
263 	if (unlikely(!flow)) {
264 		struct dp_upcall_info upcall;
265 
266 		memset(&upcall, 0, sizeof(upcall));
267 		upcall.cmd = OVS_PACKET_CMD_MISS;
268 
269 		if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
270 			upcall.portid =
271 			    ovs_dp_get_upcall_portid(dp, smp_processor_id());
272 		else
273 			upcall.portid = ovs_vport_find_upcall_portid(p, skb);
274 
275 		upcall.mru = OVS_CB(skb)->mru;
276 		error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
277 		switch (error) {
278 		case 0:
279 		case -EAGAIN:
280 		case -ERESTARTSYS:
281 		case -EINTR:
282 			consume_skb(skb);
283 			break;
284 		default:
285 			kfree_skb(skb);
286 			break;
287 		}
288 		stats_counter = &stats->n_missed;
289 		goto out;
290 	}
291 
292 	ovs_flow_stats_update(flow, key->tp.flags, skb);
293 	sf_acts = rcu_dereference(flow->sf_acts);
294 	error = ovs_execute_actions(dp, skb, sf_acts, key);
295 	if (unlikely(error))
296 		net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
297 				    ovs_dp_name(dp), error);
298 
299 	stats_counter = &stats->n_hit;
300 
301 out:
302 	/* Update datapath statistics. */
303 	u64_stats_update_begin(&stats->syncp);
304 	(*stats_counter)++;
305 	stats->n_mask_hit += n_mask_hit;
306 	stats->n_cache_hit += n_cache_hit;
307 	u64_stats_update_end(&stats->syncp);
308 }
309 
310 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
311 		  const struct sw_flow_key *key,
312 		  const struct dp_upcall_info *upcall_info,
313 		  uint32_t cutlen)
314 {
315 	struct dp_stats_percpu *stats;
316 	int err;
317 
318 	if (trace_ovs_dp_upcall_enabled())
319 		trace_ovs_dp_upcall(dp, skb, key, upcall_info);
320 
321 	if (upcall_info->portid == 0) {
322 		err = -ENOTCONN;
323 		goto err;
324 	}
325 
326 	if (!skb_is_gso(skb))
327 		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
328 	else
329 		err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
330 
331 	ovs_vport_update_upcall_stats(skb, upcall_info, !err);
332 	if (err)
333 		goto err;
334 
335 	return 0;
336 
337 err:
338 	stats = this_cpu_ptr(dp->stats_percpu);
339 
340 	u64_stats_update_begin(&stats->syncp);
341 	stats->n_lost++;
342 	u64_stats_update_end(&stats->syncp);
343 
344 	return err;
345 }
346 
347 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
348 			     const struct sw_flow_key *key,
349 			     const struct dp_upcall_info *upcall_info,
350 			     uint32_t cutlen)
351 {
352 	unsigned int gso_type = skb_shinfo(skb)->gso_type;
353 	struct sw_flow_key later_key;
354 	struct sk_buff *segs, *nskb;
355 	int err;
356 
357 	BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
358 	segs = __skb_gso_segment(skb, NETIF_F_SG, false);
359 	if (IS_ERR(segs))
360 		return PTR_ERR(segs);
361 	if (segs == NULL)
362 		return -EINVAL;
363 
364 	if (gso_type & SKB_GSO_UDP) {
365 		/* The initial flow key extracted by ovs_flow_key_extract()
366 		 * in this case is for a first fragment, so we need to
367 		 * properly mark later fragments.
368 		 */
369 		later_key = *key;
370 		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
371 	}
372 
373 	/* Queue all of the segments. */
374 	skb_list_walk_safe(segs, skb, nskb) {
375 		if (gso_type & SKB_GSO_UDP && skb != segs)
376 			key = &later_key;
377 
378 		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
379 		if (err)
380 			break;
381 
382 	}
383 
384 	/* Free all of the segments. */
385 	skb_list_walk_safe(segs, skb, nskb) {
386 		if (err)
387 			kfree_skb(skb);
388 		else
389 			consume_skb(skb);
390 	}
391 	return err;
392 }
393 
394 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
395 			      unsigned int hdrlen, int actions_attrlen)
396 {
397 	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
398 		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
399 		+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
400 		+ nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
401 		+ nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
402 
403 	/* OVS_PACKET_ATTR_USERDATA */
404 	if (upcall_info->userdata)
405 		size += NLA_ALIGN(upcall_info->userdata->nla_len);
406 
407 	/* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
408 	if (upcall_info->egress_tun_info)
409 		size += nla_total_size(ovs_tun_key_attr_size());
410 
411 	/* OVS_PACKET_ATTR_ACTIONS */
412 	if (upcall_info->actions_len)
413 		size += nla_total_size(actions_attrlen);
414 
415 	/* OVS_PACKET_ATTR_MRU */
416 	if (upcall_info->mru)
417 		size += nla_total_size(sizeof(upcall_info->mru));
418 
419 	return size;
420 }
421 
422 static void pad_packet(struct datapath *dp, struct sk_buff *skb)
423 {
424 	if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
425 		size_t plen = NLA_ALIGN(skb->len) - skb->len;
426 
427 		if (plen > 0)
428 			skb_put_zero(skb, plen);
429 	}
430 }
431 
432 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
433 				  const struct sw_flow_key *key,
434 				  const struct dp_upcall_info *upcall_info,
435 				  uint32_t cutlen)
436 {
437 	struct ovs_header *upcall;
438 	struct sk_buff *nskb = NULL;
439 	struct sk_buff *user_skb = NULL; /* to be queued to userspace */
440 	struct nlattr *nla;
441 	size_t len;
442 	unsigned int hlen;
443 	int err, dp_ifindex;
444 	u64 hash;
445 
446 	dp_ifindex = get_dpifindex(dp);
447 	if (!dp_ifindex)
448 		return -ENODEV;
449 
450 	if (skb_vlan_tag_present(skb)) {
451 		nskb = skb_clone(skb, GFP_ATOMIC);
452 		if (!nskb)
453 			return -ENOMEM;
454 
455 		nskb = __vlan_hwaccel_push_inside(nskb);
456 		if (!nskb)
457 			return -ENOMEM;
458 
459 		skb = nskb;
460 	}
461 
462 	if (nla_attr_size(skb->len) > USHRT_MAX) {
463 		err = -EFBIG;
464 		goto out;
465 	}
466 
467 	/* Complete checksum if needed */
468 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
469 	    (err = skb_csum_hwoffload_help(skb, 0)))
470 		goto out;
471 
472 	/* Older versions of OVS user space enforce alignment of the last
473 	 * Netlink attribute to NLA_ALIGNTO which would require extensive
474 	 * padding logic. Only perform zerocopy if padding is not required.
475 	 */
476 	if (dp->user_features & OVS_DP_F_UNALIGNED)
477 		hlen = skb_zerocopy_headlen(skb);
478 	else
479 		hlen = skb->len;
480 
481 	len = upcall_msg_size(upcall_info, hlen - cutlen,
482 			      OVS_CB(skb)->acts_origlen);
483 	user_skb = genlmsg_new(len, GFP_ATOMIC);
484 	if (!user_skb) {
485 		err = -ENOMEM;
486 		goto out;
487 	}
488 
489 	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
490 			     0, upcall_info->cmd);
491 	if (!upcall) {
492 		err = -EINVAL;
493 		goto out;
494 	}
495 	upcall->dp_ifindex = dp_ifindex;
496 
497 	err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
498 	if (err)
499 		goto out;
500 
501 	if (upcall_info->userdata)
502 		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
503 			  nla_len(upcall_info->userdata),
504 			  nla_data(upcall_info->userdata));
505 
506 	if (upcall_info->egress_tun_info) {
507 		nla = nla_nest_start_noflag(user_skb,
508 					    OVS_PACKET_ATTR_EGRESS_TUN_KEY);
509 		if (!nla) {
510 			err = -EMSGSIZE;
511 			goto out;
512 		}
513 		err = ovs_nla_put_tunnel_info(user_skb,
514 					      upcall_info->egress_tun_info);
515 		if (err)
516 			goto out;
517 
518 		nla_nest_end(user_skb, nla);
519 	}
520 
521 	if (upcall_info->actions_len) {
522 		nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
523 		if (!nla) {
524 			err = -EMSGSIZE;
525 			goto out;
526 		}
527 		err = ovs_nla_put_actions(upcall_info->actions,
528 					  upcall_info->actions_len,
529 					  user_skb);
530 		if (!err)
531 			nla_nest_end(user_skb, nla);
532 		else
533 			nla_nest_cancel(user_skb, nla);
534 	}
535 
536 	/* Add OVS_PACKET_ATTR_MRU */
537 	if (upcall_info->mru &&
538 	    nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
539 		err = -ENOBUFS;
540 		goto out;
541 	}
542 
543 	/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
544 	if (cutlen > 0 &&
545 	    nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
546 		err = -ENOBUFS;
547 		goto out;
548 	}
549 
550 	/* Add OVS_PACKET_ATTR_HASH */
551 	hash = skb_get_hash_raw(skb);
552 	if (skb->sw_hash)
553 		hash |= OVS_PACKET_HASH_SW_BIT;
554 
555 	if (skb->l4_hash)
556 		hash |= OVS_PACKET_HASH_L4_BIT;
557 
558 	if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
559 		err = -ENOBUFS;
560 		goto out;
561 	}
562 
563 	/* Only reserve room for attribute header, packet data is added
564 	 * in skb_zerocopy() */
565 	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
566 		err = -ENOBUFS;
567 		goto out;
568 	}
569 	nla->nla_len = nla_attr_size(skb->len - cutlen);
570 
571 	err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
572 	if (err)
573 		goto out;
574 
575 	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
576 	pad_packet(dp, user_skb);
577 
578 	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
579 
580 	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
581 	user_skb = NULL;
582 out:
583 	if (err)
584 		skb_tx_error(skb);
585 	consume_skb(user_skb);
586 	consume_skb(nskb);
587 
588 	return err;
589 }
590 
591 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
592 {
593 	struct ovs_header *ovs_header = genl_info_userhdr(info);
594 	struct net *net = sock_net(skb->sk);
595 	struct nlattr **a = info->attrs;
596 	struct sw_flow_actions *acts;
597 	struct sk_buff *packet;
598 	struct sw_flow *flow;
599 	struct sw_flow_actions *sf_acts;
600 	struct datapath *dp;
601 	struct vport *input_vport;
602 	u16 mru = 0;
603 	u64 hash;
604 	int len;
605 	int err;
606 	bool log = !a[OVS_PACKET_ATTR_PROBE];
607 
608 	err = -EINVAL;
609 	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
610 	    !a[OVS_PACKET_ATTR_ACTIONS])
611 		goto err;
612 
613 	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
614 	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
615 	err = -ENOMEM;
616 	if (!packet)
617 		goto err;
618 	skb_reserve(packet, NET_IP_ALIGN);
619 
620 	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
621 
622 	/* Set packet's mru */
623 	if (a[OVS_PACKET_ATTR_MRU]) {
624 		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
625 		packet->ignore_df = 1;
626 	}
627 	OVS_CB(packet)->mru = mru;
628 
629 	if (a[OVS_PACKET_ATTR_HASH]) {
630 		hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
631 
632 		__skb_set_hash(packet, hash & 0xFFFFFFFFULL,
633 			       !!(hash & OVS_PACKET_HASH_SW_BIT),
634 			       !!(hash & OVS_PACKET_HASH_L4_BIT));
635 	}
636 
637 	/* Build an sw_flow for sending this packet. */
638 	flow = ovs_flow_alloc();
639 	err = PTR_ERR(flow);
640 	if (IS_ERR(flow))
641 		goto err_kfree_skb;
642 
643 	err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
644 					     packet, &flow->key, log);
645 	if (err)
646 		goto err_flow_free;
647 
648 	err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
649 				   &flow->key, &acts, log);
650 	if (err)
651 		goto err_flow_free;
652 
653 	rcu_assign_pointer(flow->sf_acts, acts);
654 	packet->priority = flow->key.phy.priority;
655 	packet->mark = flow->key.phy.skb_mark;
656 
657 	rcu_read_lock();
658 	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
659 	err = -ENODEV;
660 	if (!dp)
661 		goto err_unlock;
662 
663 	input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
664 	if (!input_vport)
665 		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
666 
667 	if (!input_vport)
668 		goto err_unlock;
669 
670 	packet->dev = input_vport->dev;
671 	OVS_CB(packet)->input_vport = input_vport;
672 	sf_acts = rcu_dereference(flow->sf_acts);
673 
674 	local_bh_disable();
675 	err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
676 	local_bh_enable();
677 	rcu_read_unlock();
678 
679 	ovs_flow_free(flow, false);
680 	return err;
681 
682 err_unlock:
683 	rcu_read_unlock();
684 err_flow_free:
685 	ovs_flow_free(flow, false);
686 err_kfree_skb:
687 	kfree_skb(packet);
688 err:
689 	return err;
690 }
691 
692 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
693 	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
694 	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
695 	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
696 	[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
697 	[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
698 	[OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
699 };
700 
701 static const struct genl_small_ops dp_packet_genl_ops[] = {
702 	{ .cmd = OVS_PACKET_CMD_EXECUTE,
703 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
704 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
705 	  .doit = ovs_packet_cmd_execute
706 	}
707 };
708 
709 static struct genl_family dp_packet_genl_family __ro_after_init = {
710 	.hdrsize = sizeof(struct ovs_header),
711 	.name = OVS_PACKET_FAMILY,
712 	.version = OVS_PACKET_VERSION,
713 	.maxattr = OVS_PACKET_ATTR_MAX,
714 	.policy = packet_policy,
715 	.netnsok = true,
716 	.parallel_ops = true,
717 	.small_ops = dp_packet_genl_ops,
718 	.n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
719 	.resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
720 	.module = THIS_MODULE,
721 };
722 
723 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
724 			 struct ovs_dp_megaflow_stats *mega_stats)
725 {
726 	int i;
727 
728 	memset(mega_stats, 0, sizeof(*mega_stats));
729 
730 	stats->n_flows = ovs_flow_tbl_count(&dp->table);
731 	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
732 
733 	stats->n_hit = stats->n_missed = stats->n_lost = 0;
734 
735 	for_each_possible_cpu(i) {
736 		const struct dp_stats_percpu *percpu_stats;
737 		struct dp_stats_percpu local_stats;
738 		unsigned int start;
739 
740 		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
741 
742 		do {
743 			start = u64_stats_fetch_begin(&percpu_stats->syncp);
744 			local_stats = *percpu_stats;
745 		} while (u64_stats_fetch_retry(&percpu_stats->syncp, start));
746 
747 		stats->n_hit += local_stats.n_hit;
748 		stats->n_missed += local_stats.n_missed;
749 		stats->n_lost += local_stats.n_lost;
750 		mega_stats->n_mask_hit += local_stats.n_mask_hit;
751 		mega_stats->n_cache_hit += local_stats.n_cache_hit;
752 	}
753 }
754 
755 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
756 {
757 	return ovs_identifier_is_ufid(sfid) &&
758 	       !(ufid_flags & OVS_UFID_F_OMIT_KEY);
759 }
760 
761 static bool should_fill_mask(uint32_t ufid_flags)
762 {
763 	return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
764 }
765 
766 static bool should_fill_actions(uint32_t ufid_flags)
767 {
768 	return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
769 }
770 
771 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
772 				    const struct sw_flow_id *sfid,
773 				    uint32_t ufid_flags)
774 {
775 	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
776 
777 	/* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
778 	 * see ovs_nla_put_identifier()
779 	 */
780 	if (sfid && ovs_identifier_is_ufid(sfid))
781 		len += nla_total_size(sfid->ufid_len);
782 	else
783 		len += nla_total_size(ovs_key_attr_size());
784 
785 	/* OVS_FLOW_ATTR_KEY */
786 	if (!sfid || should_fill_key(sfid, ufid_flags))
787 		len += nla_total_size(ovs_key_attr_size());
788 
789 	/* OVS_FLOW_ATTR_MASK */
790 	if (should_fill_mask(ufid_flags))
791 		len += nla_total_size(ovs_key_attr_size());
792 
793 	/* OVS_FLOW_ATTR_ACTIONS */
794 	if (should_fill_actions(ufid_flags))
795 		len += nla_total_size(acts->orig_len);
796 
797 	return len
798 		+ nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
799 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
800 		+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
801 }
802 
803 /* Called with ovs_mutex or RCU read lock. */
804 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
805 				   struct sk_buff *skb)
806 {
807 	struct ovs_flow_stats stats;
808 	__be16 tcp_flags;
809 	unsigned long used;
810 
811 	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
812 
813 	if (used &&
814 	    nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
815 			      OVS_FLOW_ATTR_PAD))
816 		return -EMSGSIZE;
817 
818 	if (stats.n_packets &&
819 	    nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
820 			  sizeof(struct ovs_flow_stats), &stats,
821 			  OVS_FLOW_ATTR_PAD))
822 		return -EMSGSIZE;
823 
824 	if ((u8)ntohs(tcp_flags) &&
825 	     nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
826 		return -EMSGSIZE;
827 
828 	return 0;
829 }
830 
831 /* Called with ovs_mutex or RCU read lock. */
832 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
833 				     struct sk_buff *skb, int skb_orig_len)
834 {
835 	struct nlattr *start;
836 	int err;
837 
838 	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
839 	 * this is the first flow to be dumped into 'skb'.  This is unusual for
840 	 * Netlink but individual action lists can be longer than
841 	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
842 	 * The userspace caller can always fetch the actions separately if it
843 	 * really wants them.  (Most userspace callers in fact don't care.)
844 	 *
845 	 * This can only fail for dump operations because the skb is always
846 	 * properly sized for single flows.
847 	 */
848 	start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
849 	if (start) {
850 		const struct sw_flow_actions *sf_acts;
851 
852 		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
853 		err = ovs_nla_put_actions(sf_acts->actions,
854 					  sf_acts->actions_len, skb);
855 
856 		if (!err)
857 			nla_nest_end(skb, start);
858 		else {
859 			if (skb_orig_len)
860 				return err;
861 
862 			nla_nest_cancel(skb, start);
863 		}
864 	} else if (skb_orig_len) {
865 		return -EMSGSIZE;
866 	}
867 
868 	return 0;
869 }
870 
871 /* Called with ovs_mutex or RCU read lock. */
872 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
873 				  struct sk_buff *skb, u32 portid,
874 				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
875 {
876 	const int skb_orig_len = skb->len;
877 	struct ovs_header *ovs_header;
878 	int err;
879 
880 	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
881 				 flags, cmd);
882 	if (!ovs_header)
883 		return -EMSGSIZE;
884 
885 	ovs_header->dp_ifindex = dp_ifindex;
886 
887 	err = ovs_nla_put_identifier(flow, skb);
888 	if (err)
889 		goto error;
890 
891 	if (should_fill_key(&flow->id, ufid_flags)) {
892 		err = ovs_nla_put_masked_key(flow, skb);
893 		if (err)
894 			goto error;
895 	}
896 
897 	if (should_fill_mask(ufid_flags)) {
898 		err = ovs_nla_put_mask(flow, skb);
899 		if (err)
900 			goto error;
901 	}
902 
903 	err = ovs_flow_cmd_fill_stats(flow, skb);
904 	if (err)
905 		goto error;
906 
907 	if (should_fill_actions(ufid_flags)) {
908 		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
909 		if (err)
910 			goto error;
911 	}
912 
913 	genlmsg_end(skb, ovs_header);
914 	return 0;
915 
916 error:
917 	genlmsg_cancel(skb, ovs_header);
918 	return err;
919 }
920 
921 /* May not be called with RCU read lock. */
922 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
923 					       const struct sw_flow_id *sfid,
924 					       struct genl_info *info,
925 					       bool always,
926 					       uint32_t ufid_flags)
927 {
928 	struct sk_buff *skb;
929 	size_t len;
930 
931 	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
932 		return NULL;
933 
934 	len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
935 	skb = genlmsg_new(len, GFP_KERNEL);
936 	if (!skb)
937 		return ERR_PTR(-ENOMEM);
938 
939 	return skb;
940 }
941 
942 /* Called with ovs_mutex. */
943 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
944 					       int dp_ifindex,
945 					       struct genl_info *info, u8 cmd,
946 					       bool always, u32 ufid_flags)
947 {
948 	struct sk_buff *skb;
949 	int retval;
950 
951 	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
952 				      &flow->id, info, always, ufid_flags);
953 	if (IS_ERR_OR_NULL(skb))
954 		return skb;
955 
956 	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
957 					info->snd_portid, info->snd_seq, 0,
958 					cmd, ufid_flags);
959 	if (WARN_ON_ONCE(retval < 0)) {
960 		kfree_skb(skb);
961 		skb = ERR_PTR(retval);
962 	}
963 	return skb;
964 }
965 
966 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
967 {
968 	struct net *net = sock_net(skb->sk);
969 	struct nlattr **a = info->attrs;
970 	struct ovs_header *ovs_header = genl_info_userhdr(info);
971 	struct sw_flow *flow = NULL, *new_flow;
972 	struct sw_flow_mask mask;
973 	struct sk_buff *reply;
974 	struct datapath *dp;
975 	struct sw_flow_key *key;
976 	struct sw_flow_actions *acts;
977 	struct sw_flow_match match;
978 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
979 	int error;
980 	bool log = !a[OVS_FLOW_ATTR_PROBE];
981 
982 	/* Must have key and actions. */
983 	error = -EINVAL;
984 	if (!a[OVS_FLOW_ATTR_KEY]) {
985 		OVS_NLERR(log, "Flow key attr not present in new flow.");
986 		goto error;
987 	}
988 	if (!a[OVS_FLOW_ATTR_ACTIONS]) {
989 		OVS_NLERR(log, "Flow actions attr not present in new flow.");
990 		goto error;
991 	}
992 
993 	/* Most of the time we need to allocate a new flow, do it before
994 	 * locking.
995 	 */
996 	new_flow = ovs_flow_alloc();
997 	if (IS_ERR(new_flow)) {
998 		error = PTR_ERR(new_flow);
999 		goto error;
1000 	}
1001 
1002 	/* Extract key. */
1003 	key = kzalloc(sizeof(*key), GFP_KERNEL);
1004 	if (!key) {
1005 		error = -ENOMEM;
1006 		goto err_kfree_flow;
1007 	}
1008 
1009 	ovs_match_init(&match, key, false, &mask);
1010 	error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1011 				  a[OVS_FLOW_ATTR_MASK], log);
1012 	if (error)
1013 		goto err_kfree_key;
1014 
1015 	ovs_flow_mask_key(&new_flow->key, key, true, &mask);
1016 
1017 	/* Extract flow identifier. */
1018 	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
1019 				       key, log);
1020 	if (error)
1021 		goto err_kfree_key;
1022 
1023 	/* Validate actions. */
1024 	error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
1025 				     &new_flow->key, &acts, log);
1026 	if (error) {
1027 		OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
1028 		goto err_kfree_key;
1029 	}
1030 
1031 	reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
1032 					ufid_flags);
1033 	if (IS_ERR(reply)) {
1034 		error = PTR_ERR(reply);
1035 		goto err_kfree_acts;
1036 	}
1037 
1038 	ovs_lock();
1039 	dp = get_dp(net, ovs_header->dp_ifindex);
1040 	if (unlikely(!dp)) {
1041 		error = -ENODEV;
1042 		goto err_unlock_ovs;
1043 	}
1044 
1045 	/* Check if this is a duplicate flow */
1046 	if (ovs_identifier_is_ufid(&new_flow->id))
1047 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1048 	if (!flow)
1049 		flow = ovs_flow_tbl_lookup(&dp->table, key);
1050 	if (likely(!flow)) {
1051 		rcu_assign_pointer(new_flow->sf_acts, acts);
1052 
1053 		/* Put flow in bucket. */
1054 		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1055 		if (unlikely(error)) {
1056 			acts = NULL;
1057 			goto err_unlock_ovs;
1058 		}
1059 
1060 		if (unlikely(reply)) {
1061 			error = ovs_flow_cmd_fill_info(new_flow,
1062 						       ovs_header->dp_ifindex,
1063 						       reply, info->snd_portid,
1064 						       info->snd_seq, 0,
1065 						       OVS_FLOW_CMD_NEW,
1066 						       ufid_flags);
1067 			BUG_ON(error < 0);
1068 		}
1069 		ovs_unlock();
1070 	} else {
1071 		struct sw_flow_actions *old_acts;
1072 
1073 		/* Bail out if we're not allowed to modify an existing flow.
1074 		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1075 		 * because Generic Netlink treats the latter as a dump
1076 		 * request.  We also accept NLM_F_EXCL in case that bug ever
1077 		 * gets fixed.
1078 		 */
1079 		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1080 							 | NLM_F_EXCL))) {
1081 			error = -EEXIST;
1082 			goto err_unlock_ovs;
1083 		}
1084 		/* The flow identifier has to be the same for flow updates.
1085 		 * Look for any overlapping flow.
1086 		 */
1087 		if (unlikely(!ovs_flow_cmp(flow, &match))) {
1088 			if (ovs_identifier_is_key(&flow->id))
1089 				flow = ovs_flow_tbl_lookup_exact(&dp->table,
1090 								 &match);
1091 			else /* UFID matches but key is different */
1092 				flow = NULL;
1093 			if (!flow) {
1094 				error = -ENOENT;
1095 				goto err_unlock_ovs;
1096 			}
1097 		}
1098 		/* Update actions. */
1099 		old_acts = ovsl_dereference(flow->sf_acts);
1100 		rcu_assign_pointer(flow->sf_acts, acts);
1101 
1102 		if (unlikely(reply)) {
1103 			error = ovs_flow_cmd_fill_info(flow,
1104 						       ovs_header->dp_ifindex,
1105 						       reply, info->snd_portid,
1106 						       info->snd_seq, 0,
1107 						       OVS_FLOW_CMD_NEW,
1108 						       ufid_flags);
1109 			BUG_ON(error < 0);
1110 		}
1111 		ovs_unlock();
1112 
1113 		ovs_nla_free_flow_actions_rcu(old_acts);
1114 		ovs_flow_free(new_flow, false);
1115 	}
1116 
1117 	if (reply)
1118 		ovs_notify(&dp_flow_genl_family, reply, info);
1119 
1120 	kfree(key);
1121 	return 0;
1122 
1123 err_unlock_ovs:
1124 	ovs_unlock();
1125 	kfree_skb(reply);
1126 err_kfree_acts:
1127 	ovs_nla_free_flow_actions(acts);
1128 err_kfree_key:
1129 	kfree(key);
1130 err_kfree_flow:
1131 	ovs_flow_free(new_flow, false);
1132 error:
1133 	return error;
1134 }
1135 
1136 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1137 static noinline_for_stack
1138 struct sw_flow_actions *get_flow_actions(struct net *net,
1139 					 const struct nlattr *a,
1140 					 const struct sw_flow_key *key,
1141 					 const struct sw_flow_mask *mask,
1142 					 bool log)
1143 {
1144 	struct sw_flow_actions *acts;
1145 	struct sw_flow_key masked_key;
1146 	int error;
1147 
1148 	ovs_flow_mask_key(&masked_key, key, true, mask);
1149 	error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1150 	if (error) {
1151 		OVS_NLERR(log,
1152 			  "Actions may not be safe on all matching packets");
1153 		return ERR_PTR(error);
1154 	}
1155 
1156 	return acts;
1157 }
1158 
1159 /* Factor out match-init and action-copy to avoid
1160  * "Wframe-larger-than=1024" warning. Because mask is only
1161  * used to get actions, we new a function to save some
1162  * stack space.
1163  *
1164  * If there are not key and action attrs, we return 0
1165  * directly. In the case, the caller will also not use the
1166  * match as before. If there is action attr, we try to get
1167  * actions and save them to *acts. Before returning from
1168  * the function, we reset the match->mask pointer. Because
1169  * we should not to return match object with dangling reference
1170  * to mask.
1171  * */
1172 static noinline_for_stack int
1173 ovs_nla_init_match_and_action(struct net *net,
1174 			      struct sw_flow_match *match,
1175 			      struct sw_flow_key *key,
1176 			      struct nlattr **a,
1177 			      struct sw_flow_actions **acts,
1178 			      bool log)
1179 {
1180 	struct sw_flow_mask mask;
1181 	int error = 0;
1182 
1183 	if (a[OVS_FLOW_ATTR_KEY]) {
1184 		ovs_match_init(match, key, true, &mask);
1185 		error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1186 					  a[OVS_FLOW_ATTR_MASK], log);
1187 		if (error)
1188 			goto error;
1189 	}
1190 
1191 	if (a[OVS_FLOW_ATTR_ACTIONS]) {
1192 		if (!a[OVS_FLOW_ATTR_KEY]) {
1193 			OVS_NLERR(log,
1194 				  "Flow key attribute not present in set flow.");
1195 			error = -EINVAL;
1196 			goto error;
1197 		}
1198 
1199 		*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1200 					 &mask, log);
1201 		if (IS_ERR(*acts)) {
1202 			error = PTR_ERR(*acts);
1203 			goto error;
1204 		}
1205 	}
1206 
1207 	/* On success, error is 0. */
1208 error:
1209 	match->mask = NULL;
1210 	return error;
1211 }
1212 
1213 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1214 {
1215 	struct net *net = sock_net(skb->sk);
1216 	struct nlattr **a = info->attrs;
1217 	struct ovs_header *ovs_header = genl_info_userhdr(info);
1218 	struct sw_flow_key key;
1219 	struct sw_flow *flow;
1220 	struct sk_buff *reply = NULL;
1221 	struct datapath *dp;
1222 	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1223 	struct sw_flow_match match;
1224 	struct sw_flow_id sfid;
1225 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1226 	int error = 0;
1227 	bool log = !a[OVS_FLOW_ATTR_PROBE];
1228 	bool ufid_present;
1229 
1230 	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1231 	if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1232 		OVS_NLERR(log,
1233 			  "Flow set message rejected, Key attribute missing.");
1234 		return -EINVAL;
1235 	}
1236 
1237 	error = ovs_nla_init_match_and_action(net, &match, &key, a,
1238 					      &acts, log);
1239 	if (error)
1240 		goto error;
1241 
1242 	if (acts) {
1243 		/* Can allocate before locking if have acts. */
1244 		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1245 						ufid_flags);
1246 		if (IS_ERR(reply)) {
1247 			error = PTR_ERR(reply);
1248 			goto err_kfree_acts;
1249 		}
1250 	}
1251 
1252 	ovs_lock();
1253 	dp = get_dp(net, ovs_header->dp_ifindex);
1254 	if (unlikely(!dp)) {
1255 		error = -ENODEV;
1256 		goto err_unlock_ovs;
1257 	}
1258 	/* Check that the flow exists. */
1259 	if (ufid_present)
1260 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1261 	else
1262 		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1263 	if (unlikely(!flow)) {
1264 		error = -ENOENT;
1265 		goto err_unlock_ovs;
1266 	}
1267 
1268 	/* Update actions, if present. */
1269 	if (likely(acts)) {
1270 		old_acts = ovsl_dereference(flow->sf_acts);
1271 		rcu_assign_pointer(flow->sf_acts, acts);
1272 
1273 		if (unlikely(reply)) {
1274 			error = ovs_flow_cmd_fill_info(flow,
1275 						       ovs_header->dp_ifindex,
1276 						       reply, info->snd_portid,
1277 						       info->snd_seq, 0,
1278 						       OVS_FLOW_CMD_SET,
1279 						       ufid_flags);
1280 			BUG_ON(error < 0);
1281 		}
1282 	} else {
1283 		/* Could not alloc without acts before locking. */
1284 		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1285 						info, OVS_FLOW_CMD_SET, false,
1286 						ufid_flags);
1287 
1288 		if (IS_ERR(reply)) {
1289 			error = PTR_ERR(reply);
1290 			goto err_unlock_ovs;
1291 		}
1292 	}
1293 
1294 	/* Clear stats. */
1295 	if (a[OVS_FLOW_ATTR_CLEAR])
1296 		ovs_flow_stats_clear(flow);
1297 	ovs_unlock();
1298 
1299 	if (reply)
1300 		ovs_notify(&dp_flow_genl_family, reply, info);
1301 	if (old_acts)
1302 		ovs_nla_free_flow_actions_rcu(old_acts);
1303 
1304 	return 0;
1305 
1306 err_unlock_ovs:
1307 	ovs_unlock();
1308 	kfree_skb(reply);
1309 err_kfree_acts:
1310 	ovs_nla_free_flow_actions(acts);
1311 error:
1312 	return error;
1313 }
1314 
1315 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1316 {
1317 	struct nlattr **a = info->attrs;
1318 	struct ovs_header *ovs_header = genl_info_userhdr(info);
1319 	struct net *net = sock_net(skb->sk);
1320 	struct sw_flow_key key;
1321 	struct sk_buff *reply;
1322 	struct sw_flow *flow;
1323 	struct datapath *dp;
1324 	struct sw_flow_match match;
1325 	struct sw_flow_id ufid;
1326 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1327 	int err = 0;
1328 	bool log = !a[OVS_FLOW_ATTR_PROBE];
1329 	bool ufid_present;
1330 
1331 	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1332 	if (a[OVS_FLOW_ATTR_KEY]) {
1333 		ovs_match_init(&match, &key, true, NULL);
1334 		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1335 					log);
1336 	} else if (!ufid_present) {
1337 		OVS_NLERR(log,
1338 			  "Flow get message rejected, Key attribute missing.");
1339 		err = -EINVAL;
1340 	}
1341 	if (err)
1342 		return err;
1343 
1344 	ovs_lock();
1345 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1346 	if (!dp) {
1347 		err = -ENODEV;
1348 		goto unlock;
1349 	}
1350 
1351 	if (ufid_present)
1352 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1353 	else
1354 		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1355 	if (!flow) {
1356 		err = -ENOENT;
1357 		goto unlock;
1358 	}
1359 
1360 	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1361 					OVS_FLOW_CMD_GET, true, ufid_flags);
1362 	if (IS_ERR(reply)) {
1363 		err = PTR_ERR(reply);
1364 		goto unlock;
1365 	}
1366 
1367 	ovs_unlock();
1368 	return genlmsg_reply(reply, info);
1369 unlock:
1370 	ovs_unlock();
1371 	return err;
1372 }
1373 
1374 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1375 {
1376 	struct nlattr **a = info->attrs;
1377 	struct ovs_header *ovs_header = genl_info_userhdr(info);
1378 	struct net *net = sock_net(skb->sk);
1379 	struct sw_flow_key key;
1380 	struct sk_buff *reply;
1381 	struct sw_flow *flow = NULL;
1382 	struct datapath *dp;
1383 	struct sw_flow_match match;
1384 	struct sw_flow_id ufid;
1385 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1386 	int err;
1387 	bool log = !a[OVS_FLOW_ATTR_PROBE];
1388 	bool ufid_present;
1389 
1390 	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1391 	if (a[OVS_FLOW_ATTR_KEY]) {
1392 		ovs_match_init(&match, &key, true, NULL);
1393 		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1394 					NULL, log);
1395 		if (unlikely(err))
1396 			return err;
1397 	}
1398 
1399 	ovs_lock();
1400 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1401 	if (unlikely(!dp)) {
1402 		err = -ENODEV;
1403 		goto unlock;
1404 	}
1405 
1406 	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1407 		err = ovs_flow_tbl_flush(&dp->table);
1408 		goto unlock;
1409 	}
1410 
1411 	if (ufid_present)
1412 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1413 	else
1414 		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1415 	if (unlikely(!flow)) {
1416 		err = -ENOENT;
1417 		goto unlock;
1418 	}
1419 
1420 	ovs_flow_tbl_remove(&dp->table, flow);
1421 	ovs_unlock();
1422 
1423 	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1424 					&flow->id, info, false, ufid_flags);
1425 	if (likely(reply)) {
1426 		if (!IS_ERR(reply)) {
1427 			rcu_read_lock();	/*To keep RCU checker happy. */
1428 			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1429 						     reply, info->snd_portid,
1430 						     info->snd_seq, 0,
1431 						     OVS_FLOW_CMD_DEL,
1432 						     ufid_flags);
1433 			rcu_read_unlock();
1434 			if (WARN_ON_ONCE(err < 0)) {
1435 				kfree_skb(reply);
1436 				goto out_free;
1437 			}
1438 
1439 			ovs_notify(&dp_flow_genl_family, reply, info);
1440 		} else {
1441 			netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1442 					PTR_ERR(reply));
1443 		}
1444 	}
1445 
1446 out_free:
1447 	ovs_flow_free(flow, true);
1448 	return 0;
1449 unlock:
1450 	ovs_unlock();
1451 	return err;
1452 }
1453 
1454 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1455 {
1456 	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1457 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1458 	struct table_instance *ti;
1459 	struct datapath *dp;
1460 	u32 ufid_flags;
1461 	int err;
1462 
1463 	err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1464 				       OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1465 	if (err)
1466 		return err;
1467 	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1468 
1469 	rcu_read_lock();
1470 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1471 	if (!dp) {
1472 		rcu_read_unlock();
1473 		return -ENODEV;
1474 	}
1475 
1476 	ti = rcu_dereference(dp->table.ti);
1477 	for (;;) {
1478 		struct sw_flow *flow;
1479 		u32 bucket, obj;
1480 
1481 		bucket = cb->args[0];
1482 		obj = cb->args[1];
1483 		flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1484 		if (!flow)
1485 			break;
1486 
1487 		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1488 					   NETLINK_CB(cb->skb).portid,
1489 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1490 					   OVS_FLOW_CMD_GET, ufid_flags) < 0)
1491 			break;
1492 
1493 		cb->args[0] = bucket;
1494 		cb->args[1] = obj;
1495 	}
1496 	rcu_read_unlock();
1497 	return skb->len;
1498 }
1499 
1500 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1501 	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1502 	[OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1503 	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1504 	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1505 	[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1506 	[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1507 	[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1508 };
1509 
1510 static const struct genl_small_ops dp_flow_genl_ops[] = {
1511 	{ .cmd = OVS_FLOW_CMD_NEW,
1512 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1513 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1514 	  .doit = ovs_flow_cmd_new
1515 	},
1516 	{ .cmd = OVS_FLOW_CMD_DEL,
1517 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1518 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1519 	  .doit = ovs_flow_cmd_del
1520 	},
1521 	{ .cmd = OVS_FLOW_CMD_GET,
1522 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1523 	  .flags = 0,		    /* OK for unprivileged users. */
1524 	  .doit = ovs_flow_cmd_get,
1525 	  .dumpit = ovs_flow_cmd_dump
1526 	},
1527 	{ .cmd = OVS_FLOW_CMD_SET,
1528 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1529 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1530 	  .doit = ovs_flow_cmd_set,
1531 	},
1532 };
1533 
1534 static struct genl_family dp_flow_genl_family __ro_after_init = {
1535 	.hdrsize = sizeof(struct ovs_header),
1536 	.name = OVS_FLOW_FAMILY,
1537 	.version = OVS_FLOW_VERSION,
1538 	.maxattr = OVS_FLOW_ATTR_MAX,
1539 	.policy = flow_policy,
1540 	.netnsok = true,
1541 	.parallel_ops = true,
1542 	.small_ops = dp_flow_genl_ops,
1543 	.n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
1544 	.resv_start_op = OVS_FLOW_CMD_SET + 1,
1545 	.mcgrps = &ovs_dp_flow_multicast_group,
1546 	.n_mcgrps = 1,
1547 	.module = THIS_MODULE,
1548 };
1549 
1550 static size_t ovs_dp_cmd_msg_size(void)
1551 {
1552 	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1553 
1554 	msgsize += nla_total_size(IFNAMSIZ);
1555 	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1556 	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1557 	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1558 	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1559 	msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
1560 
1561 	return msgsize;
1562 }
1563 
1564 /* Called with ovs_mutex. */
1565 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1566 				u32 portid, u32 seq, u32 flags, u8 cmd)
1567 {
1568 	struct ovs_header *ovs_header;
1569 	struct ovs_dp_stats dp_stats;
1570 	struct ovs_dp_megaflow_stats dp_megaflow_stats;
1571 	struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
1572 	int err, pids_len;
1573 
1574 	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1575 				 flags, cmd);
1576 	if (!ovs_header)
1577 		goto error;
1578 
1579 	ovs_header->dp_ifindex = get_dpifindex(dp);
1580 
1581 	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1582 	if (err)
1583 		goto nla_put_failure;
1584 
1585 	get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1586 	if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1587 			  &dp_stats, OVS_DP_ATTR_PAD))
1588 		goto nla_put_failure;
1589 
1590 	if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1591 			  sizeof(struct ovs_dp_megaflow_stats),
1592 			  &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1593 		goto nla_put_failure;
1594 
1595 	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1596 		goto nla_put_failure;
1597 
1598 	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1599 			ovs_flow_tbl_masks_cache_size(&dp->table)))
1600 		goto nla_put_failure;
1601 
1602 	if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
1603 		pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
1604 		if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
1605 			goto nla_put_failure;
1606 	}
1607 
1608 	genlmsg_end(skb, ovs_header);
1609 	return 0;
1610 
1611 nla_put_failure:
1612 	genlmsg_cancel(skb, ovs_header);
1613 error:
1614 	return -EMSGSIZE;
1615 }
1616 
1617 static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1618 {
1619 	return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1620 }
1621 
1622 /* Called with rcu_read_lock or ovs_mutex. */
1623 static struct datapath *lookup_datapath(struct net *net,
1624 					const struct ovs_header *ovs_header,
1625 					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1626 {
1627 	struct datapath *dp;
1628 
1629 	if (!a[OVS_DP_ATTR_NAME])
1630 		dp = get_dp(net, ovs_header->dp_ifindex);
1631 	else {
1632 		struct vport *vport;
1633 
1634 		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1635 		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1636 	}
1637 	return dp ? dp : ERR_PTR(-ENODEV);
1638 }
1639 
1640 static void ovs_dp_reset_user_features(struct sk_buff *skb,
1641 				       struct genl_info *info)
1642 {
1643 	struct datapath *dp;
1644 
1645 	dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1646 			     info->attrs);
1647 	if (IS_ERR(dp))
1648 		return;
1649 
1650 	pr_warn("%s: Dropping previously announced user features\n",
1651 		ovs_dp_name(dp));
1652 	dp->user_features = 0;
1653 }
1654 
1655 static int ovs_dp_set_upcall_portids(struct datapath *dp,
1656 			      const struct nlattr *ids)
1657 {
1658 	struct dp_nlsk_pids *old, *dp_nlsk_pids;
1659 
1660 	if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
1661 		return -EINVAL;
1662 
1663 	old = ovsl_dereference(dp->upcall_portids);
1664 
1665 	dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
1666 			       GFP_KERNEL);
1667 	if (!dp_nlsk_pids)
1668 		return -ENOMEM;
1669 
1670 	dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
1671 	nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
1672 
1673 	rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
1674 
1675 	kfree_rcu(old, rcu);
1676 
1677 	return 0;
1678 }
1679 
1680 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
1681 {
1682 	struct dp_nlsk_pids *dp_nlsk_pids;
1683 
1684 	dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
1685 
1686 	if (dp_nlsk_pids) {
1687 		if (cpu_id < dp_nlsk_pids->n_pids) {
1688 			return dp_nlsk_pids->pids[cpu_id];
1689 		} else if (dp_nlsk_pids->n_pids > 0 &&
1690 			   cpu_id >= dp_nlsk_pids->n_pids) {
1691 			/* If the number of netlink PIDs is mismatched with
1692 			 * the number of CPUs as seen by the kernel, log this
1693 			 * and send the upcall to an arbitrary socket (0) in
1694 			 * order to not drop packets
1695 			 */
1696 			pr_info_ratelimited("cpu_id mismatch with handler threads");
1697 			return dp_nlsk_pids->pids[cpu_id %
1698 						  dp_nlsk_pids->n_pids];
1699 		} else {
1700 			return 0;
1701 		}
1702 	} else {
1703 		return 0;
1704 	}
1705 }
1706 
1707 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1708 {
1709 	u32 user_features = 0, old_features = dp->user_features;
1710 	int err;
1711 
1712 	if (a[OVS_DP_ATTR_USER_FEATURES]) {
1713 		user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1714 
1715 		if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1716 				      OVS_DP_F_UNALIGNED |
1717 				      OVS_DP_F_TC_RECIRC_SHARING |
1718 				      OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
1719 			return -EOPNOTSUPP;
1720 
1721 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1722 		if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1723 			return -EOPNOTSUPP;
1724 #endif
1725 	}
1726 
1727 	if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1728 		int err;
1729 		u32 cache_size;
1730 
1731 		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1732 		err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1733 		if (err)
1734 			return err;
1735 	}
1736 
1737 	dp->user_features = user_features;
1738 
1739 	if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
1740 	    a[OVS_DP_ATTR_PER_CPU_PIDS]) {
1741 		/* Upcall Netlink Port IDs have been updated */
1742 		err = ovs_dp_set_upcall_portids(dp,
1743 						a[OVS_DP_ATTR_PER_CPU_PIDS]);
1744 		if (err)
1745 			return err;
1746 	}
1747 
1748 	if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1749 	    !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
1750 		tc_skb_ext_tc_enable();
1751 	else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1752 		 (old_features & OVS_DP_F_TC_RECIRC_SHARING))
1753 		tc_skb_ext_tc_disable();
1754 
1755 	return 0;
1756 }
1757 
1758 static int ovs_dp_stats_init(struct datapath *dp)
1759 {
1760 	dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1761 	if (!dp->stats_percpu)
1762 		return -ENOMEM;
1763 
1764 	return 0;
1765 }
1766 
1767 static int ovs_dp_vport_init(struct datapath *dp)
1768 {
1769 	int i;
1770 
1771 	dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1772 				  sizeof(struct hlist_head),
1773 				  GFP_KERNEL);
1774 	if (!dp->ports)
1775 		return -ENOMEM;
1776 
1777 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1778 		INIT_HLIST_HEAD(&dp->ports[i]);
1779 
1780 	return 0;
1781 }
1782 
1783 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1784 {
1785 	struct nlattr **a = info->attrs;
1786 	struct vport_parms parms;
1787 	struct sk_buff *reply;
1788 	struct datapath *dp;
1789 	struct vport *vport;
1790 	struct ovs_net *ovs_net;
1791 	int err;
1792 
1793 	err = -EINVAL;
1794 	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1795 		goto err;
1796 
1797 	reply = ovs_dp_cmd_alloc_info();
1798 	if (!reply)
1799 		return -ENOMEM;
1800 
1801 	err = -ENOMEM;
1802 	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1803 	if (dp == NULL)
1804 		goto err_destroy_reply;
1805 
1806 	ovs_dp_set_net(dp, sock_net(skb->sk));
1807 
1808 	/* Allocate table. */
1809 	err = ovs_flow_tbl_init(&dp->table);
1810 	if (err)
1811 		goto err_destroy_dp;
1812 
1813 	err = ovs_dp_stats_init(dp);
1814 	if (err)
1815 		goto err_destroy_table;
1816 
1817 	err = ovs_dp_vport_init(dp);
1818 	if (err)
1819 		goto err_destroy_stats;
1820 
1821 	err = ovs_meters_init(dp);
1822 	if (err)
1823 		goto err_destroy_ports;
1824 
1825 	/* Set up our datapath device. */
1826 	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1827 	parms.type = OVS_VPORT_TYPE_INTERNAL;
1828 	parms.options = NULL;
1829 	parms.dp = dp;
1830 	parms.port_no = OVSP_LOCAL;
1831 	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1832 	parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
1833 		? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
1834 
1835 	/* So far only local changes have been made, now need the lock. */
1836 	ovs_lock();
1837 
1838 	err = ovs_dp_change(dp, a);
1839 	if (err)
1840 		goto err_unlock_and_destroy_meters;
1841 
1842 	vport = new_vport(&parms);
1843 	if (IS_ERR(vport)) {
1844 		err = PTR_ERR(vport);
1845 		if (err == -EBUSY)
1846 			err = -EEXIST;
1847 
1848 		if (err == -EEXIST) {
1849 			/* An outdated user space instance that does not understand
1850 			 * the concept of user_features has attempted to create a new
1851 			 * datapath and is likely to reuse it. Drop all user features.
1852 			 */
1853 			if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1854 				ovs_dp_reset_user_features(skb, info);
1855 		}
1856 
1857 		goto err_destroy_portids;
1858 	}
1859 
1860 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1861 				   info->snd_seq, 0, OVS_DP_CMD_NEW);
1862 	BUG_ON(err < 0);
1863 
1864 	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1865 	list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1866 
1867 	ovs_unlock();
1868 
1869 	ovs_notify(&dp_datapath_genl_family, reply, info);
1870 	return 0;
1871 
1872 err_destroy_portids:
1873 	kfree(rcu_dereference_raw(dp->upcall_portids));
1874 err_unlock_and_destroy_meters:
1875 	ovs_unlock();
1876 	ovs_meters_exit(dp);
1877 err_destroy_ports:
1878 	kfree(dp->ports);
1879 err_destroy_stats:
1880 	free_percpu(dp->stats_percpu);
1881 err_destroy_table:
1882 	ovs_flow_tbl_destroy(&dp->table);
1883 err_destroy_dp:
1884 	kfree(dp);
1885 err_destroy_reply:
1886 	kfree_skb(reply);
1887 err:
1888 	return err;
1889 }
1890 
1891 /* Called with ovs_mutex. */
1892 static void __dp_destroy(struct datapath *dp)
1893 {
1894 	struct flow_table *table = &dp->table;
1895 	int i;
1896 
1897 	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1898 		tc_skb_ext_tc_disable();
1899 
1900 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1901 		struct vport *vport;
1902 		struct hlist_node *n;
1903 
1904 		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1905 			if (vport->port_no != OVSP_LOCAL)
1906 				ovs_dp_detach_port(vport);
1907 	}
1908 
1909 	list_del_rcu(&dp->list_node);
1910 
1911 	/* OVSP_LOCAL is datapath internal port. We need to make sure that
1912 	 * all ports in datapath are destroyed first before freeing datapath.
1913 	 */
1914 	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1915 
1916 	/* Flush sw_flow in the tables. RCU cb only releases resource
1917 	 * such as dp, ports and tables. That may avoid some issues
1918 	 * such as RCU usage warning.
1919 	 */
1920 	table_instance_flow_flush(table, ovsl_dereference(table->ti),
1921 				  ovsl_dereference(table->ufid_ti));
1922 
1923 	/* RCU destroy the ports, meters and flow tables. */
1924 	call_rcu(&dp->rcu, destroy_dp_rcu);
1925 }
1926 
1927 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1928 {
1929 	struct sk_buff *reply;
1930 	struct datapath *dp;
1931 	int err;
1932 
1933 	reply = ovs_dp_cmd_alloc_info();
1934 	if (!reply)
1935 		return -ENOMEM;
1936 
1937 	ovs_lock();
1938 	dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1939 			     info->attrs);
1940 	err = PTR_ERR(dp);
1941 	if (IS_ERR(dp))
1942 		goto err_unlock_free;
1943 
1944 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1945 				   info->snd_seq, 0, OVS_DP_CMD_DEL);
1946 	BUG_ON(err < 0);
1947 
1948 	__dp_destroy(dp);
1949 	ovs_unlock();
1950 
1951 	ovs_notify(&dp_datapath_genl_family, reply, info);
1952 
1953 	return 0;
1954 
1955 err_unlock_free:
1956 	ovs_unlock();
1957 	kfree_skb(reply);
1958 	return err;
1959 }
1960 
1961 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1962 {
1963 	struct sk_buff *reply;
1964 	struct datapath *dp;
1965 	int err;
1966 
1967 	reply = ovs_dp_cmd_alloc_info();
1968 	if (!reply)
1969 		return -ENOMEM;
1970 
1971 	ovs_lock();
1972 	dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1973 			     info->attrs);
1974 	err = PTR_ERR(dp);
1975 	if (IS_ERR(dp))
1976 		goto err_unlock_free;
1977 
1978 	err = ovs_dp_change(dp, info->attrs);
1979 	if (err)
1980 		goto err_unlock_free;
1981 
1982 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1983 				   info->snd_seq, 0, OVS_DP_CMD_SET);
1984 	BUG_ON(err < 0);
1985 
1986 	ovs_unlock();
1987 	ovs_notify(&dp_datapath_genl_family, reply, info);
1988 
1989 	return 0;
1990 
1991 err_unlock_free:
1992 	ovs_unlock();
1993 	kfree_skb(reply);
1994 	return err;
1995 }
1996 
1997 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1998 {
1999 	struct sk_buff *reply;
2000 	struct datapath *dp;
2001 	int err;
2002 
2003 	reply = ovs_dp_cmd_alloc_info();
2004 	if (!reply)
2005 		return -ENOMEM;
2006 
2007 	ovs_lock();
2008 	dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
2009 			     info->attrs);
2010 	if (IS_ERR(dp)) {
2011 		err = PTR_ERR(dp);
2012 		goto err_unlock_free;
2013 	}
2014 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
2015 				   info->snd_seq, 0, OVS_DP_CMD_GET);
2016 	BUG_ON(err < 0);
2017 	ovs_unlock();
2018 
2019 	return genlmsg_reply(reply, info);
2020 
2021 err_unlock_free:
2022 	ovs_unlock();
2023 	kfree_skb(reply);
2024 	return err;
2025 }
2026 
2027 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2028 {
2029 	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
2030 	struct datapath *dp;
2031 	int skip = cb->args[0];
2032 	int i = 0;
2033 
2034 	ovs_lock();
2035 	list_for_each_entry(dp, &ovs_net->dps, list_node) {
2036 		if (i >= skip &&
2037 		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
2038 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
2039 					 OVS_DP_CMD_GET) < 0)
2040 			break;
2041 		i++;
2042 	}
2043 	ovs_unlock();
2044 
2045 	cb->args[0] = i;
2046 
2047 	return skb->len;
2048 }
2049 
2050 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
2051 	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2052 	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2053 	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
2054 	[OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
2055 		PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
2056 	[OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
2057 };
2058 
2059 static const struct genl_small_ops dp_datapath_genl_ops[] = {
2060 	{ .cmd = OVS_DP_CMD_NEW,
2061 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2062 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2063 	  .doit = ovs_dp_cmd_new
2064 	},
2065 	{ .cmd = OVS_DP_CMD_DEL,
2066 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2067 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2068 	  .doit = ovs_dp_cmd_del
2069 	},
2070 	{ .cmd = OVS_DP_CMD_GET,
2071 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2072 	  .flags = 0,		    /* OK for unprivileged users. */
2073 	  .doit = ovs_dp_cmd_get,
2074 	  .dumpit = ovs_dp_cmd_dump
2075 	},
2076 	{ .cmd = OVS_DP_CMD_SET,
2077 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2078 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2079 	  .doit = ovs_dp_cmd_set,
2080 	},
2081 };
2082 
2083 static struct genl_family dp_datapath_genl_family __ro_after_init = {
2084 	.hdrsize = sizeof(struct ovs_header),
2085 	.name = OVS_DATAPATH_FAMILY,
2086 	.version = OVS_DATAPATH_VERSION,
2087 	.maxattr = OVS_DP_ATTR_MAX,
2088 	.policy = datapath_policy,
2089 	.netnsok = true,
2090 	.parallel_ops = true,
2091 	.small_ops = dp_datapath_genl_ops,
2092 	.n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
2093 	.resv_start_op = OVS_DP_CMD_SET + 1,
2094 	.mcgrps = &ovs_dp_datapath_multicast_group,
2095 	.n_mcgrps = 1,
2096 	.module = THIS_MODULE,
2097 };
2098 
2099 /* Called with ovs_mutex or RCU read lock. */
2100 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
2101 				   struct net *net, u32 portid, u32 seq,
2102 				   u32 flags, u8 cmd, gfp_t gfp)
2103 {
2104 	struct ovs_header *ovs_header;
2105 	struct ovs_vport_stats vport_stats;
2106 	int err;
2107 
2108 	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
2109 				 flags, cmd);
2110 	if (!ovs_header)
2111 		return -EMSGSIZE;
2112 
2113 	ovs_header->dp_ifindex = get_dpifindex(vport->dp);
2114 
2115 	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
2116 	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
2117 	    nla_put_string(skb, OVS_VPORT_ATTR_NAME,
2118 			   ovs_vport_name(vport)) ||
2119 	    nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
2120 		goto nla_put_failure;
2121 
2122 	if (!net_eq(net, dev_net(vport->dev))) {
2123 		int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
2124 
2125 		if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
2126 			goto nla_put_failure;
2127 	}
2128 
2129 	ovs_vport_get_stats(vport, &vport_stats);
2130 	if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2131 			  sizeof(struct ovs_vport_stats), &vport_stats,
2132 			  OVS_VPORT_ATTR_PAD))
2133 		goto nla_put_failure;
2134 
2135 	if (ovs_vport_get_upcall_stats(vport, skb))
2136 		goto nla_put_failure;
2137 
2138 	if (ovs_vport_get_upcall_portids(vport, skb))
2139 		goto nla_put_failure;
2140 
2141 	err = ovs_vport_get_options(vport, skb);
2142 	if (err == -EMSGSIZE)
2143 		goto error;
2144 
2145 	genlmsg_end(skb, ovs_header);
2146 	return 0;
2147 
2148 nla_put_failure:
2149 	err = -EMSGSIZE;
2150 error:
2151 	genlmsg_cancel(skb, ovs_header);
2152 	return err;
2153 }
2154 
2155 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2156 {
2157 	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2158 }
2159 
2160 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
2161 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2162 					 u32 portid, u32 seq, u8 cmd)
2163 {
2164 	struct sk_buff *skb;
2165 	int retval;
2166 
2167 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2168 	if (!skb)
2169 		return ERR_PTR(-ENOMEM);
2170 
2171 	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2172 					 GFP_KERNEL);
2173 	BUG_ON(retval < 0);
2174 
2175 	return skb;
2176 }
2177 
2178 /* Called with ovs_mutex or RCU read lock. */
2179 static struct vport *lookup_vport(struct net *net,
2180 				  const struct ovs_header *ovs_header,
2181 				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2182 {
2183 	struct datapath *dp;
2184 	struct vport *vport;
2185 
2186 	if (a[OVS_VPORT_ATTR_IFINDEX])
2187 		return ERR_PTR(-EOPNOTSUPP);
2188 	if (a[OVS_VPORT_ATTR_NAME]) {
2189 		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2190 		if (!vport)
2191 			return ERR_PTR(-ENODEV);
2192 		if (ovs_header->dp_ifindex &&
2193 		    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2194 			return ERR_PTR(-ENODEV);
2195 		return vport;
2196 	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2197 		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2198 
2199 		if (port_no >= DP_MAX_PORTS)
2200 			return ERR_PTR(-EFBIG);
2201 
2202 		dp = get_dp(net, ovs_header->dp_ifindex);
2203 		if (!dp)
2204 			return ERR_PTR(-ENODEV);
2205 
2206 		vport = ovs_vport_ovsl_rcu(dp, port_no);
2207 		if (!vport)
2208 			return ERR_PTR(-ENODEV);
2209 		return vport;
2210 	} else
2211 		return ERR_PTR(-EINVAL);
2212 
2213 }
2214 
2215 static unsigned int ovs_get_max_headroom(struct datapath *dp)
2216 {
2217 	unsigned int dev_headroom, max_headroom = 0;
2218 	struct net_device *dev;
2219 	struct vport *vport;
2220 	int i;
2221 
2222 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2223 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2224 					 lockdep_ovsl_is_held()) {
2225 			dev = vport->dev;
2226 			dev_headroom = netdev_get_fwd_headroom(dev);
2227 			if (dev_headroom > max_headroom)
2228 				max_headroom = dev_headroom;
2229 		}
2230 	}
2231 
2232 	return max_headroom;
2233 }
2234 
2235 /* Called with ovs_mutex */
2236 static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2237 {
2238 	struct vport *vport;
2239 	int i;
2240 
2241 	dp->max_headroom = new_headroom;
2242 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2243 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2244 					 lockdep_ovsl_is_held())
2245 			netdev_set_rx_headroom(vport->dev, new_headroom);
2246 	}
2247 }
2248 
2249 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2250 {
2251 	struct nlattr **a = info->attrs;
2252 	struct ovs_header *ovs_header = genl_info_userhdr(info);
2253 	struct vport_parms parms;
2254 	struct sk_buff *reply;
2255 	struct vport *vport;
2256 	struct datapath *dp;
2257 	unsigned int new_headroom;
2258 	u32 port_no;
2259 	int err;
2260 
2261 	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2262 	    !a[OVS_VPORT_ATTR_UPCALL_PID])
2263 		return -EINVAL;
2264 
2265 	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2266 
2267 	if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
2268 		return -EOPNOTSUPP;
2269 
2270 	port_no = a[OVS_VPORT_ATTR_PORT_NO]
2271 		? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2272 	if (port_no >= DP_MAX_PORTS)
2273 		return -EFBIG;
2274 
2275 	reply = ovs_vport_cmd_alloc_info();
2276 	if (!reply)
2277 		return -ENOMEM;
2278 
2279 	ovs_lock();
2280 restart:
2281 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2282 	err = -ENODEV;
2283 	if (!dp)
2284 		goto exit_unlock_free;
2285 
2286 	if (port_no) {
2287 		vport = ovs_vport_ovsl(dp, port_no);
2288 		err = -EBUSY;
2289 		if (vport)
2290 			goto exit_unlock_free;
2291 	} else {
2292 		for (port_no = 1; ; port_no++) {
2293 			if (port_no >= DP_MAX_PORTS) {
2294 				err = -EFBIG;
2295 				goto exit_unlock_free;
2296 			}
2297 			vport = ovs_vport_ovsl(dp, port_no);
2298 			if (!vport)
2299 				break;
2300 		}
2301 	}
2302 
2303 	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2304 	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2305 	parms.dp = dp;
2306 	parms.port_no = port_no;
2307 	parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2308 	parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
2309 		? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
2310 
2311 	vport = new_vport(&parms);
2312 	err = PTR_ERR(vport);
2313 	if (IS_ERR(vport)) {
2314 		if (err == -EAGAIN)
2315 			goto restart;
2316 		goto exit_unlock_free;
2317 	}
2318 
2319 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2320 				      info->snd_portid, info->snd_seq, 0,
2321 				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
2322 
2323 	new_headroom = netdev_get_fwd_headroom(vport->dev);
2324 
2325 	if (new_headroom > dp->max_headroom)
2326 		ovs_update_headroom(dp, new_headroom);
2327 	else
2328 		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2329 
2330 	BUG_ON(err < 0);
2331 	ovs_unlock();
2332 
2333 	ovs_notify(&dp_vport_genl_family, reply, info);
2334 	return 0;
2335 
2336 exit_unlock_free:
2337 	ovs_unlock();
2338 	kfree_skb(reply);
2339 	return err;
2340 }
2341 
2342 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2343 {
2344 	struct nlattr **a = info->attrs;
2345 	struct sk_buff *reply;
2346 	struct vport *vport;
2347 	int err;
2348 
2349 	reply = ovs_vport_cmd_alloc_info();
2350 	if (!reply)
2351 		return -ENOMEM;
2352 
2353 	ovs_lock();
2354 	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
2355 	err = PTR_ERR(vport);
2356 	if (IS_ERR(vport))
2357 		goto exit_unlock_free;
2358 
2359 	if (a[OVS_VPORT_ATTR_TYPE] &&
2360 	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2361 		err = -EINVAL;
2362 		goto exit_unlock_free;
2363 	}
2364 
2365 	if (a[OVS_VPORT_ATTR_OPTIONS]) {
2366 		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2367 		if (err)
2368 			goto exit_unlock_free;
2369 	}
2370 
2371 
2372 	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2373 		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2374 
2375 		err = ovs_vport_set_upcall_portids(vport, ids);
2376 		if (err)
2377 			goto exit_unlock_free;
2378 	}
2379 
2380 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2381 				      info->snd_portid, info->snd_seq, 0,
2382 				      OVS_VPORT_CMD_SET, GFP_KERNEL);
2383 	BUG_ON(err < 0);
2384 
2385 	ovs_unlock();
2386 	ovs_notify(&dp_vport_genl_family, reply, info);
2387 	return 0;
2388 
2389 exit_unlock_free:
2390 	ovs_unlock();
2391 	kfree_skb(reply);
2392 	return err;
2393 }
2394 
2395 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2396 {
2397 	bool update_headroom = false;
2398 	struct nlattr **a = info->attrs;
2399 	struct sk_buff *reply;
2400 	struct datapath *dp;
2401 	struct vport *vport;
2402 	unsigned int new_headroom;
2403 	int err;
2404 
2405 	reply = ovs_vport_cmd_alloc_info();
2406 	if (!reply)
2407 		return -ENOMEM;
2408 
2409 	ovs_lock();
2410 	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
2411 	err = PTR_ERR(vport);
2412 	if (IS_ERR(vport))
2413 		goto exit_unlock_free;
2414 
2415 	if (vport->port_no == OVSP_LOCAL) {
2416 		err = -EINVAL;
2417 		goto exit_unlock_free;
2418 	}
2419 
2420 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2421 				      info->snd_portid, info->snd_seq, 0,
2422 				      OVS_VPORT_CMD_DEL, GFP_KERNEL);
2423 	BUG_ON(err < 0);
2424 
2425 	/* the vport deletion may trigger dp headroom update */
2426 	dp = vport->dp;
2427 	if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2428 		update_headroom = true;
2429 
2430 	netdev_reset_rx_headroom(vport->dev);
2431 	ovs_dp_detach_port(vport);
2432 
2433 	if (update_headroom) {
2434 		new_headroom = ovs_get_max_headroom(dp);
2435 
2436 		if (new_headroom < dp->max_headroom)
2437 			ovs_update_headroom(dp, new_headroom);
2438 	}
2439 	ovs_unlock();
2440 
2441 	ovs_notify(&dp_vport_genl_family, reply, info);
2442 	return 0;
2443 
2444 exit_unlock_free:
2445 	ovs_unlock();
2446 	kfree_skb(reply);
2447 	return err;
2448 }
2449 
2450 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2451 {
2452 	struct nlattr **a = info->attrs;
2453 	struct ovs_header *ovs_header = genl_info_userhdr(info);
2454 	struct sk_buff *reply;
2455 	struct vport *vport;
2456 	int err;
2457 
2458 	reply = ovs_vport_cmd_alloc_info();
2459 	if (!reply)
2460 		return -ENOMEM;
2461 
2462 	rcu_read_lock();
2463 	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2464 	err = PTR_ERR(vport);
2465 	if (IS_ERR(vport))
2466 		goto exit_unlock_free;
2467 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2468 				      info->snd_portid, info->snd_seq, 0,
2469 				      OVS_VPORT_CMD_GET, GFP_ATOMIC);
2470 	BUG_ON(err < 0);
2471 	rcu_read_unlock();
2472 
2473 	return genlmsg_reply(reply, info);
2474 
2475 exit_unlock_free:
2476 	rcu_read_unlock();
2477 	kfree_skb(reply);
2478 	return err;
2479 }
2480 
2481 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2482 {
2483 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2484 	struct datapath *dp;
2485 	int bucket = cb->args[0], skip = cb->args[1];
2486 	int i, j = 0;
2487 
2488 	rcu_read_lock();
2489 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2490 	if (!dp) {
2491 		rcu_read_unlock();
2492 		return -ENODEV;
2493 	}
2494 	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2495 		struct vport *vport;
2496 
2497 		j = 0;
2498 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2499 			if (j >= skip &&
2500 			    ovs_vport_cmd_fill_info(vport, skb,
2501 						    sock_net(skb->sk),
2502 						    NETLINK_CB(cb->skb).portid,
2503 						    cb->nlh->nlmsg_seq,
2504 						    NLM_F_MULTI,
2505 						    OVS_VPORT_CMD_GET,
2506 						    GFP_ATOMIC) < 0)
2507 				goto out;
2508 
2509 			j++;
2510 		}
2511 		skip = 0;
2512 	}
2513 out:
2514 	rcu_read_unlock();
2515 
2516 	cb->args[0] = i;
2517 	cb->args[1] = j;
2518 
2519 	return skb->len;
2520 }
2521 
2522 static void ovs_dp_masks_rebalance(struct work_struct *work)
2523 {
2524 	struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2525 					       masks_rebalance.work);
2526 	struct datapath *dp;
2527 
2528 	ovs_lock();
2529 
2530 	list_for_each_entry(dp, &ovs_net->dps, list_node)
2531 		ovs_flow_masks_rebalance(&dp->table);
2532 
2533 	ovs_unlock();
2534 
2535 	schedule_delayed_work(&ovs_net->masks_rebalance,
2536 			      msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2537 }
2538 
2539 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2540 	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2541 	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2542 	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2543 	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2544 	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2545 	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2546 	[OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
2547 	[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2548 	[OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
2549 };
2550 
2551 static const struct genl_small_ops dp_vport_genl_ops[] = {
2552 	{ .cmd = OVS_VPORT_CMD_NEW,
2553 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2554 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2555 	  .doit = ovs_vport_cmd_new
2556 	},
2557 	{ .cmd = OVS_VPORT_CMD_DEL,
2558 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2559 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2560 	  .doit = ovs_vport_cmd_del
2561 	},
2562 	{ .cmd = OVS_VPORT_CMD_GET,
2563 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2564 	  .flags = 0,		    /* OK for unprivileged users. */
2565 	  .doit = ovs_vport_cmd_get,
2566 	  .dumpit = ovs_vport_cmd_dump
2567 	},
2568 	{ .cmd = OVS_VPORT_CMD_SET,
2569 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2570 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2571 	  .doit = ovs_vport_cmd_set,
2572 	},
2573 };
2574 
2575 struct genl_family dp_vport_genl_family __ro_after_init = {
2576 	.hdrsize = sizeof(struct ovs_header),
2577 	.name = OVS_VPORT_FAMILY,
2578 	.version = OVS_VPORT_VERSION,
2579 	.maxattr = OVS_VPORT_ATTR_MAX,
2580 	.policy = vport_policy,
2581 	.netnsok = true,
2582 	.parallel_ops = true,
2583 	.small_ops = dp_vport_genl_ops,
2584 	.n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
2585 	.resv_start_op = OVS_VPORT_CMD_SET + 1,
2586 	.mcgrps = &ovs_dp_vport_multicast_group,
2587 	.n_mcgrps = 1,
2588 	.module = THIS_MODULE,
2589 };
2590 
2591 static struct genl_family * const dp_genl_families[] = {
2592 	&dp_datapath_genl_family,
2593 	&dp_vport_genl_family,
2594 	&dp_flow_genl_family,
2595 	&dp_packet_genl_family,
2596 	&dp_meter_genl_family,
2597 #if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2598 	&dp_ct_limit_genl_family,
2599 #endif
2600 };
2601 
2602 static void dp_unregister_genl(int n_families)
2603 {
2604 	int i;
2605 
2606 	for (i = 0; i < n_families; i++)
2607 		genl_unregister_family(dp_genl_families[i]);
2608 }
2609 
2610 static int __init dp_register_genl(void)
2611 {
2612 	int err;
2613 	int i;
2614 
2615 	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2616 
2617 		err = genl_register_family(dp_genl_families[i]);
2618 		if (err)
2619 			goto error;
2620 	}
2621 
2622 	return 0;
2623 
2624 error:
2625 	dp_unregister_genl(i);
2626 	return err;
2627 }
2628 
2629 static int __net_init ovs_init_net(struct net *net)
2630 {
2631 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2632 	int err;
2633 
2634 	INIT_LIST_HEAD(&ovs_net->dps);
2635 	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2636 	INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2637 
2638 	err = ovs_ct_init(net);
2639 	if (err)
2640 		return err;
2641 
2642 	schedule_delayed_work(&ovs_net->masks_rebalance,
2643 			      msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2644 	return 0;
2645 }
2646 
2647 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2648 					    struct list_head *head)
2649 {
2650 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2651 	struct datapath *dp;
2652 
2653 	list_for_each_entry(dp, &ovs_net->dps, list_node) {
2654 		int i;
2655 
2656 		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2657 			struct vport *vport;
2658 
2659 			hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2660 				if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2661 					continue;
2662 
2663 				if (dev_net(vport->dev) == dnet)
2664 					list_add(&vport->detach_list, head);
2665 			}
2666 		}
2667 	}
2668 }
2669 
2670 static void __net_exit ovs_exit_net(struct net *dnet)
2671 {
2672 	struct datapath *dp, *dp_next;
2673 	struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2674 	struct vport *vport, *vport_next;
2675 	struct net *net;
2676 	LIST_HEAD(head);
2677 
2678 	ovs_lock();
2679 
2680 	ovs_ct_exit(dnet);
2681 
2682 	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2683 		__dp_destroy(dp);
2684 
2685 	down_read(&net_rwsem);
2686 	for_each_net(net)
2687 		list_vports_from_net(net, dnet, &head);
2688 	up_read(&net_rwsem);
2689 
2690 	/* Detach all vports from given namespace. */
2691 	list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2692 		list_del(&vport->detach_list);
2693 		ovs_dp_detach_port(vport);
2694 	}
2695 
2696 	ovs_unlock();
2697 
2698 	cancel_delayed_work_sync(&ovs_net->masks_rebalance);
2699 	cancel_work_sync(&ovs_net->dp_notify_work);
2700 }
2701 
2702 static struct pernet_operations ovs_net_ops = {
2703 	.init = ovs_init_net,
2704 	.exit = ovs_exit_net,
2705 	.id   = &ovs_net_id,
2706 	.size = sizeof(struct ovs_net),
2707 };
2708 
2709 static const char * const ovs_drop_reasons[] = {
2710 #define S(x)	(#x),
2711 	OVS_DROP_REASONS(S)
2712 #undef S
2713 };
2714 
2715 static struct drop_reason_list drop_reason_list_ovs = {
2716 	.reasons = ovs_drop_reasons,
2717 	.n_reasons = ARRAY_SIZE(ovs_drop_reasons),
2718 };
2719 
2720 static int __init dp_init(void)
2721 {
2722 	int err;
2723 
2724 	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2725 		     sizeof_field(struct sk_buff, cb));
2726 
2727 	pr_info("Open vSwitch switching datapath\n");
2728 
2729 	err = action_fifos_init();
2730 	if (err)
2731 		goto error;
2732 
2733 	err = ovs_internal_dev_rtnl_link_register();
2734 	if (err)
2735 		goto error_action_fifos_exit;
2736 
2737 	err = ovs_flow_init();
2738 	if (err)
2739 		goto error_unreg_rtnl_link;
2740 
2741 	err = ovs_vport_init();
2742 	if (err)
2743 		goto error_flow_exit;
2744 
2745 	err = register_pernet_device(&ovs_net_ops);
2746 	if (err)
2747 		goto error_vport_exit;
2748 
2749 	err = register_netdevice_notifier(&ovs_dp_device_notifier);
2750 	if (err)
2751 		goto error_netns_exit;
2752 
2753 	err = ovs_netdev_init();
2754 	if (err)
2755 		goto error_unreg_notifier;
2756 
2757 	err = dp_register_genl();
2758 	if (err < 0)
2759 		goto error_unreg_netdev;
2760 
2761 	drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
2762 				     &drop_reason_list_ovs);
2763 
2764 	return 0;
2765 
2766 error_unreg_netdev:
2767 	ovs_netdev_exit();
2768 error_unreg_notifier:
2769 	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2770 error_netns_exit:
2771 	unregister_pernet_device(&ovs_net_ops);
2772 error_vport_exit:
2773 	ovs_vport_exit();
2774 error_flow_exit:
2775 	ovs_flow_exit();
2776 error_unreg_rtnl_link:
2777 	ovs_internal_dev_rtnl_link_unregister();
2778 error_action_fifos_exit:
2779 	action_fifos_exit();
2780 error:
2781 	return err;
2782 }
2783 
2784 static void dp_cleanup(void)
2785 {
2786 	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2787 	ovs_netdev_exit();
2788 	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2789 	unregister_pernet_device(&ovs_net_ops);
2790 	drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
2791 	rcu_barrier();
2792 	ovs_vport_exit();
2793 	ovs_flow_exit();
2794 	ovs_internal_dev_rtnl_link_unregister();
2795 	action_fifos_exit();
2796 }
2797 
2798 module_init(dp_init);
2799 module_exit(dp_cleanup);
2800 
2801 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2802 MODULE_LICENSE("GPL");
2803 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2804 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2805 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2806 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2807 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2808 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
2809