xref: /openbmc/linux/net/openvswitch/vport.c (revision 7f8a436e)
1ccb1352eSJesse Gross /*
283c8df26SPravin B Shelar  * Copyright (c) 2007-2014 Nicira, Inc.
3ccb1352eSJesse Gross  *
4ccb1352eSJesse Gross  * This program is free software; you can redistribute it and/or
5ccb1352eSJesse Gross  * modify it under the terms of version 2 of the GNU General Public
6ccb1352eSJesse Gross  * License as published by the Free Software Foundation.
7ccb1352eSJesse Gross  *
8ccb1352eSJesse Gross  * This program is distributed in the hope that it will be useful, but
9ccb1352eSJesse Gross  * WITHOUT ANY WARRANTY; without even the implied warranty of
10ccb1352eSJesse Gross  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11ccb1352eSJesse Gross  * General Public License for more details.
12ccb1352eSJesse Gross  *
13ccb1352eSJesse Gross  * You should have received a copy of the GNU General Public License
14ccb1352eSJesse Gross  * along with this program; if not, write to the Free Software
15ccb1352eSJesse Gross  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16ccb1352eSJesse Gross  * 02110-1301, USA
17ccb1352eSJesse Gross  */
18ccb1352eSJesse Gross 
19ccb1352eSJesse Gross #include <linux/etherdevice.h>
20ccb1352eSJesse Gross #include <linux/if.h>
21ccb1352eSJesse Gross #include <linux/if_vlan.h>
2246df7b81SPravin B Shelar #include <linux/jhash.h>
23ccb1352eSJesse Gross #include <linux/kernel.h>
24ccb1352eSJesse Gross #include <linux/list.h>
25ccb1352eSJesse Gross #include <linux/mutex.h>
26ccb1352eSJesse Gross #include <linux/percpu.h>
27ccb1352eSJesse Gross #include <linux/rcupdate.h>
28ccb1352eSJesse Gross #include <linux/rtnetlink.h>
29ccb1352eSJesse Gross #include <linux/compat.h>
3046df7b81SPravin B Shelar #include <net/net_namespace.h>
3162b9c8d0SThomas Graf #include <linux/module.h>
32ccb1352eSJesse Gross 
3346df7b81SPravin B Shelar #include "datapath.h"
34ccb1352eSJesse Gross #include "vport.h"
35ccb1352eSJesse Gross #include "vport-internal_dev.h"
36ccb1352eSJesse Gross 
37443cd88cSStephen Hemminger static void ovs_vport_record_error(struct vport *,
38443cd88cSStephen Hemminger 				   enum vport_err_type err_type);
39443cd88cSStephen Hemminger 
4062b9c8d0SThomas Graf static LIST_HEAD(vport_ops_list);
41ccb1352eSJesse Gross 
428e4e1713SPravin B Shelar /* Protected by RCU read lock for reading, ovs_mutex for writing. */
43ccb1352eSJesse Gross static struct hlist_head *dev_table;
44ccb1352eSJesse Gross #define VPORT_HASH_BUCKETS 1024
45ccb1352eSJesse Gross 
46ccb1352eSJesse Gross /**
47ccb1352eSJesse Gross  *	ovs_vport_init - initialize vport subsystem
48ccb1352eSJesse Gross  *
49ccb1352eSJesse Gross  * Called at module load time to initialize the vport subsystem.
50ccb1352eSJesse Gross  */
51ccb1352eSJesse Gross int ovs_vport_init(void)
52ccb1352eSJesse Gross {
53ccb1352eSJesse Gross 	dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
54ccb1352eSJesse Gross 			    GFP_KERNEL);
55ccb1352eSJesse Gross 	if (!dev_table)
56ccb1352eSJesse Gross 		return -ENOMEM;
57ccb1352eSJesse Gross 
58ccb1352eSJesse Gross 	return 0;
59ccb1352eSJesse Gross }
60ccb1352eSJesse Gross 
61ccb1352eSJesse Gross /**
62ccb1352eSJesse Gross  *	ovs_vport_exit - shutdown vport subsystem
63ccb1352eSJesse Gross  *
64ccb1352eSJesse Gross  * Called at module exit time to shutdown the vport subsystem.
65ccb1352eSJesse Gross  */
66ccb1352eSJesse Gross void ovs_vport_exit(void)
67ccb1352eSJesse Gross {
68ccb1352eSJesse Gross 	kfree(dev_table);
69ccb1352eSJesse Gross }
70ccb1352eSJesse Gross 
7112eb18f7SThomas Graf static struct hlist_head *hash_bucket(const struct net *net, const char *name)
72ccb1352eSJesse Gross {
7346df7b81SPravin B Shelar 	unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
74ccb1352eSJesse Gross 	return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
75ccb1352eSJesse Gross }
76ccb1352eSJesse Gross 
7762b9c8d0SThomas Graf int ovs_vport_ops_register(struct vport_ops *ops)
7862b9c8d0SThomas Graf {
7962b9c8d0SThomas Graf 	int err = -EEXIST;
8062b9c8d0SThomas Graf 	struct vport_ops *o;
8162b9c8d0SThomas Graf 
8262b9c8d0SThomas Graf 	ovs_lock();
8362b9c8d0SThomas Graf 	list_for_each_entry(o, &vport_ops_list, list)
8462b9c8d0SThomas Graf 		if (ops->type == o->type)
8562b9c8d0SThomas Graf 			goto errout;
8662b9c8d0SThomas Graf 
8762b9c8d0SThomas Graf 	list_add_tail(&ops->list, &vport_ops_list);
8862b9c8d0SThomas Graf 	err = 0;
8962b9c8d0SThomas Graf errout:
9062b9c8d0SThomas Graf 	ovs_unlock();
9162b9c8d0SThomas Graf 	return err;
9262b9c8d0SThomas Graf }
939ba559d9SPravin B Shelar EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
9462b9c8d0SThomas Graf 
9562b9c8d0SThomas Graf void ovs_vport_ops_unregister(struct vport_ops *ops)
9662b9c8d0SThomas Graf {
9762b9c8d0SThomas Graf 	ovs_lock();
9862b9c8d0SThomas Graf 	list_del(&ops->list);
9962b9c8d0SThomas Graf 	ovs_unlock();
10062b9c8d0SThomas Graf }
1019ba559d9SPravin B Shelar EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
10262b9c8d0SThomas Graf 
103ccb1352eSJesse Gross /**
104ccb1352eSJesse Gross  *	ovs_vport_locate - find a port that has already been created
105ccb1352eSJesse Gross  *
106ccb1352eSJesse Gross  * @name: name of port to find
107ccb1352eSJesse Gross  *
1088e4e1713SPravin B Shelar  * Must be called with ovs or RCU read lock.
109ccb1352eSJesse Gross  */
11012eb18f7SThomas Graf struct vport *ovs_vport_locate(const struct net *net, const char *name)
111ccb1352eSJesse Gross {
11246df7b81SPravin B Shelar 	struct hlist_head *bucket = hash_bucket(net, name);
113ccb1352eSJesse Gross 	struct vport *vport;
114ccb1352eSJesse Gross 
115b67bfe0dSSasha Levin 	hlist_for_each_entry_rcu(vport, bucket, hash_node)
116c9db965cSThomas Graf 		if (!strcmp(name, ovs_vport_name(vport)) &&
11746df7b81SPravin B Shelar 		    net_eq(ovs_dp_get_net(vport->dp), net))
118ccb1352eSJesse Gross 			return vport;
119ccb1352eSJesse Gross 
120ccb1352eSJesse Gross 	return NULL;
121ccb1352eSJesse Gross }
122ccb1352eSJesse Gross 
123ccb1352eSJesse Gross /**
124ccb1352eSJesse Gross  *	ovs_vport_alloc - allocate and initialize new vport
125ccb1352eSJesse Gross  *
126ccb1352eSJesse Gross  * @priv_size: Size of private data area to allocate.
127ccb1352eSJesse Gross  * @ops: vport device ops
128ccb1352eSJesse Gross  *
129ccb1352eSJesse Gross  * Allocate and initialize a new vport defined by @ops.  The vport will contain
130ccb1352eSJesse Gross  * a private data area of size @priv_size that can be accessed using
131ccb1352eSJesse Gross  * vport_priv().  vports that are no longer needed should be released with
132ccb1352eSJesse Gross  * vport_free().
133ccb1352eSJesse Gross  */
134ccb1352eSJesse Gross struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
135ccb1352eSJesse Gross 			  const struct vport_parms *parms)
136ccb1352eSJesse Gross {
137ccb1352eSJesse Gross 	struct vport *vport;
138ccb1352eSJesse Gross 	size_t alloc_size;
139ccb1352eSJesse Gross 
140ccb1352eSJesse Gross 	alloc_size = sizeof(struct vport);
141ccb1352eSJesse Gross 	if (priv_size) {
142ccb1352eSJesse Gross 		alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
143ccb1352eSJesse Gross 		alloc_size += priv_size;
144ccb1352eSJesse Gross 	}
145ccb1352eSJesse Gross 
146ccb1352eSJesse Gross 	vport = kzalloc(alloc_size, GFP_KERNEL);
147ccb1352eSJesse Gross 	if (!vport)
148ccb1352eSJesse Gross 		return ERR_PTR(-ENOMEM);
149ccb1352eSJesse Gross 
150ccb1352eSJesse Gross 	vport->dp = parms->dp;
151ccb1352eSJesse Gross 	vport->port_no = parms->port_no;
152ccb1352eSJesse Gross 	vport->ops = ops;
15315eac2a7SPravin B Shelar 	INIT_HLIST_NODE(&vport->dp_hash_node);
154ccb1352eSJesse Gross 
1553791b3f6SChristoph Jaeger 	if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
1563791b3f6SChristoph Jaeger 		kfree(vport);
1575cd667b0SAlex Wang 		return ERR_PTR(-EINVAL);
1583791b3f6SChristoph Jaeger 	}
1595cd667b0SAlex Wang 
1601c213bd2SWANG Cong 	vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
161f0a98ae8SDan Carpenter 	if (!vport->percpu_stats) {
162f0a98ae8SDan Carpenter 		kfree(vport);
163ccb1352eSJesse Gross 		return ERR_PTR(-ENOMEM);
164f0a98ae8SDan Carpenter 	}
165ccb1352eSJesse Gross 
166ccb1352eSJesse Gross 	return vport;
167ccb1352eSJesse Gross }
1689ba559d9SPravin B Shelar EXPORT_SYMBOL_GPL(ovs_vport_alloc);
169ccb1352eSJesse Gross 
170ccb1352eSJesse Gross /**
171ccb1352eSJesse Gross  *	ovs_vport_free - uninitialize and free vport
172ccb1352eSJesse Gross  *
173ccb1352eSJesse Gross  * @vport: vport to free
174ccb1352eSJesse Gross  *
175ccb1352eSJesse Gross  * Frees a vport allocated with vport_alloc() when it is no longer needed.
176ccb1352eSJesse Gross  *
177ccb1352eSJesse Gross  * The caller must ensure that an RCU grace period has passed since the last
178ccb1352eSJesse Gross  * time @vport was in a datapath.
179ccb1352eSJesse Gross  */
180ccb1352eSJesse Gross void ovs_vport_free(struct vport *vport)
181ccb1352eSJesse Gross {
1825cd667b0SAlex Wang 	/* vport is freed from RCU callback or error path, Therefore
1835cd667b0SAlex Wang 	 * it is safe to use raw dereference.
1845cd667b0SAlex Wang 	 */
1855cd667b0SAlex Wang 	kfree(rcu_dereference_raw(vport->upcall_portids));
186ccb1352eSJesse Gross 	free_percpu(vport->percpu_stats);
187ccb1352eSJesse Gross 	kfree(vport);
188ccb1352eSJesse Gross }
1899ba559d9SPravin B Shelar EXPORT_SYMBOL_GPL(ovs_vport_free);
19062b9c8d0SThomas Graf 
19162b9c8d0SThomas Graf static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
19262b9c8d0SThomas Graf {
19362b9c8d0SThomas Graf 	struct vport_ops *ops;
19462b9c8d0SThomas Graf 
19562b9c8d0SThomas Graf 	list_for_each_entry(ops, &vport_ops_list, list)
19662b9c8d0SThomas Graf 		if (ops->type == parms->type)
19762b9c8d0SThomas Graf 			return ops;
19862b9c8d0SThomas Graf 
19962b9c8d0SThomas Graf 	return NULL;
20062b9c8d0SThomas Graf }
201ccb1352eSJesse Gross 
202ccb1352eSJesse Gross /**
203ccb1352eSJesse Gross  *	ovs_vport_add - add vport device (for kernel callers)
204ccb1352eSJesse Gross  *
205ccb1352eSJesse Gross  * @parms: Information about new vport.
206ccb1352eSJesse Gross  *
207ccb1352eSJesse Gross  * Creates a new vport with the specified configuration (which is dependent on
2088e4e1713SPravin B Shelar  * device type).  ovs_mutex must be held.
209ccb1352eSJesse Gross  */
210ccb1352eSJesse Gross struct vport *ovs_vport_add(const struct vport_parms *parms)
211ccb1352eSJesse Gross {
21262b9c8d0SThomas Graf 	struct vport_ops *ops;
213ccb1352eSJesse Gross 	struct vport *vport;
214ccb1352eSJesse Gross 
21562b9c8d0SThomas Graf 	ops = ovs_vport_lookup(parms);
21662b9c8d0SThomas Graf 	if (ops) {
21746df7b81SPravin B Shelar 		struct hlist_head *bucket;
21846df7b81SPravin B Shelar 
21962b9c8d0SThomas Graf 		if (!try_module_get(ops->owner))
22062b9c8d0SThomas Graf 			return ERR_PTR(-EAFNOSUPPORT);
22162b9c8d0SThomas Graf 
22262b9c8d0SThomas Graf 		vport = ops->create(parms);
223ccb1352eSJesse Gross 		if (IS_ERR(vport)) {
22462b9c8d0SThomas Graf 			module_put(ops->owner);
22562b9c8d0SThomas Graf 			return vport;
226ccb1352eSJesse Gross 		}
227ccb1352eSJesse Gross 
22846df7b81SPravin B Shelar 		bucket = hash_bucket(ovs_dp_get_net(vport->dp),
229c9db965cSThomas Graf 				     ovs_vport_name(vport));
23046df7b81SPravin B Shelar 		hlist_add_head_rcu(&vport->hash_node, bucket);
231ccb1352eSJesse Gross 		return vport;
232ccb1352eSJesse Gross 	}
233ccb1352eSJesse Gross 
23462b9c8d0SThomas Graf 	/* Unlock to attempt module load and return -EAGAIN if load
23562b9c8d0SThomas Graf 	 * was successful as we need to restart the port addition
23662b9c8d0SThomas Graf 	 * workflow.
23762b9c8d0SThomas Graf 	 */
23862b9c8d0SThomas Graf 	ovs_unlock();
23962b9c8d0SThomas Graf 	request_module("vport-type-%d", parms->type);
24062b9c8d0SThomas Graf 	ovs_lock();
241ccb1352eSJesse Gross 
24262b9c8d0SThomas Graf 	if (!ovs_vport_lookup(parms))
24362b9c8d0SThomas Graf 		return ERR_PTR(-EAFNOSUPPORT);
24462b9c8d0SThomas Graf 	else
24562b9c8d0SThomas Graf 		return ERR_PTR(-EAGAIN);
246ccb1352eSJesse Gross }
247ccb1352eSJesse Gross 
248ccb1352eSJesse Gross /**
249ccb1352eSJesse Gross  *	ovs_vport_set_options - modify existing vport device (for kernel callers)
250ccb1352eSJesse Gross  *
251ccb1352eSJesse Gross  * @vport: vport to modify.
2522694838dSJustin Pettit  * @options: New configuration.
253ccb1352eSJesse Gross  *
254ccb1352eSJesse Gross  * Modifies an existing device with the specified configuration (which is
2558e4e1713SPravin B Shelar  * dependent on device type).  ovs_mutex must be held.
256ccb1352eSJesse Gross  */
257ccb1352eSJesse Gross int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
258ccb1352eSJesse Gross {
259ccb1352eSJesse Gross 	if (!vport->ops->set_options)
260ccb1352eSJesse Gross 		return -EOPNOTSUPP;
261ccb1352eSJesse Gross 	return vport->ops->set_options(vport, options);
262ccb1352eSJesse Gross }
263ccb1352eSJesse Gross 
264ccb1352eSJesse Gross /**
265ccb1352eSJesse Gross  *	ovs_vport_del - delete existing vport device
266ccb1352eSJesse Gross  *
267ccb1352eSJesse Gross  * @vport: vport to delete.
268ccb1352eSJesse Gross  *
269ccb1352eSJesse Gross  * Detaches @vport from its datapath and destroys it.  It is possible to fail
2708e4e1713SPravin B Shelar  * for reasons such as lack of memory.  ovs_mutex must be held.
271ccb1352eSJesse Gross  */
272ccb1352eSJesse Gross void ovs_vport_del(struct vport *vport)
273ccb1352eSJesse Gross {
2748e4e1713SPravin B Shelar 	ASSERT_OVSL();
275ccb1352eSJesse Gross 
276ccb1352eSJesse Gross 	hlist_del_rcu(&vport->hash_node);
27762b9c8d0SThomas Graf 	module_put(vport->ops->owner);
278fa2d8ff4SThomas Graf 	vport->ops->destroy(vport);
279ccb1352eSJesse Gross }
280ccb1352eSJesse Gross 
281ccb1352eSJesse Gross /**
282ccb1352eSJesse Gross  *	ovs_vport_get_stats - retrieve device stats
283ccb1352eSJesse Gross  *
284ccb1352eSJesse Gross  * @vport: vport from which to retrieve the stats
285ccb1352eSJesse Gross  * @stats: location to store stats
286ccb1352eSJesse Gross  *
287ccb1352eSJesse Gross  * Retrieves transmit, receive, and error stats for the given device.
288ccb1352eSJesse Gross  *
2898e4e1713SPravin B Shelar  * Must be called with ovs_mutex or rcu_read_lock.
290ccb1352eSJesse Gross  */
291ccb1352eSJesse Gross void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
292ccb1352eSJesse Gross {
293ccb1352eSJesse Gross 	int i;
294ccb1352eSJesse Gross 
295ccb1352eSJesse Gross 	memset(stats, 0, sizeof(*stats));
296ccb1352eSJesse Gross 
297ccb1352eSJesse Gross 	/* We potentially have 2 sources of stats that need to be combined:
298ccb1352eSJesse Gross 	 * those we have collected (split into err_stats and percpu_stats) from
299ccb1352eSJesse Gross 	 * set_stats() and device error stats from netdev->get_stats() (for
300ccb1352eSJesse Gross 	 * errors that happen  downstream and therefore aren't reported through
301ccb1352eSJesse Gross 	 * our vport_record_error() function).
302ccb1352eSJesse Gross 	 * Stats from first source are reported by ovs (OVS_VPORT_ATTR_STATS).
303ccb1352eSJesse Gross 	 * netdev-stats can be directly read over netlink-ioctl.
304ccb1352eSJesse Gross 	 */
305ccb1352eSJesse Gross 
306e403adedSLi RongQing 	stats->rx_errors  = atomic_long_read(&vport->err_stats.rx_errors);
307e403adedSLi RongQing 	stats->tx_errors  = atomic_long_read(&vport->err_stats.tx_errors);
308e403adedSLi RongQing 	stats->tx_dropped = atomic_long_read(&vport->err_stats.tx_dropped);
309e403adedSLi RongQing 	stats->rx_dropped = atomic_long_read(&vport->err_stats.rx_dropped);
310ccb1352eSJesse Gross 
311ccb1352eSJesse Gross 	for_each_possible_cpu(i) {
3128f84985fSLi RongQing 		const struct pcpu_sw_netstats *percpu_stats;
3138f84985fSLi RongQing 		struct pcpu_sw_netstats local_stats;
314ccb1352eSJesse Gross 		unsigned int start;
315ccb1352eSJesse Gross 
316ccb1352eSJesse Gross 		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
317ccb1352eSJesse Gross 
318ccb1352eSJesse Gross 		do {
31957a7744eSEric W. Biederman 			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
320ccb1352eSJesse Gross 			local_stats = *percpu_stats;
32157a7744eSEric W. Biederman 		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
322ccb1352eSJesse Gross 
323ccb1352eSJesse Gross 		stats->rx_bytes		+= local_stats.rx_bytes;
324ccb1352eSJesse Gross 		stats->rx_packets	+= local_stats.rx_packets;
325ccb1352eSJesse Gross 		stats->tx_bytes		+= local_stats.tx_bytes;
326ccb1352eSJesse Gross 		stats->tx_packets	+= local_stats.tx_packets;
327ccb1352eSJesse Gross 	}
328ccb1352eSJesse Gross }
329ccb1352eSJesse Gross 
330ccb1352eSJesse Gross /**
331ccb1352eSJesse Gross  *	ovs_vport_get_options - retrieve device options
332ccb1352eSJesse Gross  *
333ccb1352eSJesse Gross  * @vport: vport from which to retrieve the options.
334ccb1352eSJesse Gross  * @skb: sk_buff where options should be appended.
335ccb1352eSJesse Gross  *
336ccb1352eSJesse Gross  * Retrieves the configuration of the given device, appending an
337ccb1352eSJesse Gross  * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
338ccb1352eSJesse Gross  * vport-specific attributes to @skb.
339ccb1352eSJesse Gross  *
340ccb1352eSJesse Gross  * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
341ccb1352eSJesse Gross  * negative error code if a real error occurred.  If an error occurs, @skb is
342ccb1352eSJesse Gross  * left unmodified.
343ccb1352eSJesse Gross  *
3448e4e1713SPravin B Shelar  * Must be called with ovs_mutex or rcu_read_lock.
345ccb1352eSJesse Gross  */
346ccb1352eSJesse Gross int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
347ccb1352eSJesse Gross {
348ccb1352eSJesse Gross 	struct nlattr *nla;
3495d963352SThomas Graf 	int err;
3505d963352SThomas Graf 
3515d963352SThomas Graf 	if (!vport->ops->get_options)
3525d963352SThomas Graf 		return 0;
353ccb1352eSJesse Gross 
354ccb1352eSJesse Gross 	nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
355ccb1352eSJesse Gross 	if (!nla)
356ccb1352eSJesse Gross 		return -EMSGSIZE;
357ccb1352eSJesse Gross 
3585d963352SThomas Graf 	err = vport->ops->get_options(vport, skb);
359ccb1352eSJesse Gross 	if (err) {
360ccb1352eSJesse Gross 		nla_nest_cancel(skb, nla);
361ccb1352eSJesse Gross 		return err;
362ccb1352eSJesse Gross 	}
363ccb1352eSJesse Gross 
364ccb1352eSJesse Gross 	nla_nest_end(skb, nla);
365ccb1352eSJesse Gross 	return 0;
366ccb1352eSJesse Gross }
367ccb1352eSJesse Gross 
368ccb1352eSJesse Gross /**
3695cd667b0SAlex Wang  *	ovs_vport_set_upcall_portids - set upcall portids of @vport.
3705cd667b0SAlex Wang  *
3715cd667b0SAlex Wang  * @vport: vport to modify.
3725cd667b0SAlex Wang  * @ids: new configuration, an array of port ids.
3735cd667b0SAlex Wang  *
3745cd667b0SAlex Wang  * Sets the vport's upcall_portids to @ids.
3755cd667b0SAlex Wang  *
3765cd667b0SAlex Wang  * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
3775cd667b0SAlex Wang  * as an array of U32.
3785cd667b0SAlex Wang  *
3795cd667b0SAlex Wang  * Must be called with ovs_mutex.
3805cd667b0SAlex Wang  */
38112eb18f7SThomas Graf int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
3825cd667b0SAlex Wang {
3835cd667b0SAlex Wang 	struct vport_portids *old, *vport_portids;
3845cd667b0SAlex Wang 
3855cd667b0SAlex Wang 	if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
3865cd667b0SAlex Wang 		return -EINVAL;
3875cd667b0SAlex Wang 
3885cd667b0SAlex Wang 	old = ovsl_dereference(vport->upcall_portids);
3895cd667b0SAlex Wang 
3905cd667b0SAlex Wang 	vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
3915cd667b0SAlex Wang 				GFP_KERNEL);
3925cd667b0SAlex Wang 	if (!vport_portids)
3935cd667b0SAlex Wang 		return -ENOMEM;
3945cd667b0SAlex Wang 
3955cd667b0SAlex Wang 	vport_portids->n_ids = nla_len(ids) / sizeof(u32);
3965cd667b0SAlex Wang 	vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
3975cd667b0SAlex Wang 	nla_memcpy(vport_portids->ids, ids, nla_len(ids));
3985cd667b0SAlex Wang 
3995cd667b0SAlex Wang 	rcu_assign_pointer(vport->upcall_portids, vport_portids);
4005cd667b0SAlex Wang 
4015cd667b0SAlex Wang 	if (old)
4025cd667b0SAlex Wang 		kfree_rcu(old, rcu);
4035cd667b0SAlex Wang 	return 0;
4045cd667b0SAlex Wang }
4055cd667b0SAlex Wang 
4065cd667b0SAlex Wang /**
4075cd667b0SAlex Wang  *	ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
4085cd667b0SAlex Wang  *
4095cd667b0SAlex Wang  * @vport: vport from which to retrieve the portids.
4105cd667b0SAlex Wang  * @skb: sk_buff where portids should be appended.
4115cd667b0SAlex Wang  *
4125cd667b0SAlex Wang  * Retrieves the configuration of the given vport, appending the
4135cd667b0SAlex Wang  * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
4145cd667b0SAlex Wang  * portids to @skb.
4155cd667b0SAlex Wang  *
4165cd667b0SAlex Wang  * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
4175cd667b0SAlex Wang  * If an error occurs, @skb is left unmodified.  Must be called with
4185cd667b0SAlex Wang  * ovs_mutex or rcu_read_lock.
4195cd667b0SAlex Wang  */
4205cd667b0SAlex Wang int ovs_vport_get_upcall_portids(const struct vport *vport,
4215cd667b0SAlex Wang 				 struct sk_buff *skb)
4225cd667b0SAlex Wang {
4235cd667b0SAlex Wang 	struct vport_portids *ids;
4245cd667b0SAlex Wang 
4255cd667b0SAlex Wang 	ids = rcu_dereference_ovsl(vport->upcall_portids);
4265cd667b0SAlex Wang 
4275cd667b0SAlex Wang 	if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
4285cd667b0SAlex Wang 		return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
4295cd667b0SAlex Wang 			       ids->n_ids * sizeof(u32), (void *)ids->ids);
4305cd667b0SAlex Wang 	else
4315cd667b0SAlex Wang 		return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
4325cd667b0SAlex Wang }
4335cd667b0SAlex Wang 
4345cd667b0SAlex Wang /**
4355cd667b0SAlex Wang  *	ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
4365cd667b0SAlex Wang  *
4375cd667b0SAlex Wang  * @vport: vport from which the missed packet is received.
4385cd667b0SAlex Wang  * @skb: skb that the missed packet was received.
4395cd667b0SAlex Wang  *
4405cd667b0SAlex Wang  * Uses the skb_get_hash() to select the upcall portid to send the
4415cd667b0SAlex Wang  * upcall.
4425cd667b0SAlex Wang  *
4435cd667b0SAlex Wang  * Returns the portid of the target socket.  Must be called with rcu_read_lock.
4445cd667b0SAlex Wang  */
4454e8febd0SFabian Frederick u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
4465cd667b0SAlex Wang {
4475cd667b0SAlex Wang 	struct vport_portids *ids;
4485cd667b0SAlex Wang 	u32 ids_index;
4495cd667b0SAlex Wang 	u32 hash;
4505cd667b0SAlex Wang 
4514e8febd0SFabian Frederick 	ids = rcu_dereference(vport->upcall_portids);
4525cd667b0SAlex Wang 
4535cd667b0SAlex Wang 	if (ids->n_ids == 1 && ids->ids[0] == 0)
4545cd667b0SAlex Wang 		return 0;
4555cd667b0SAlex Wang 
4565cd667b0SAlex Wang 	hash = skb_get_hash(skb);
4575cd667b0SAlex Wang 	ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
4585cd667b0SAlex Wang 	return ids->ids[ids_index];
4595cd667b0SAlex Wang }
4605cd667b0SAlex Wang 
4615cd667b0SAlex Wang /**
462ccb1352eSJesse Gross  *	ovs_vport_receive - pass up received packet to the datapath for processing
463ccb1352eSJesse Gross  *
464ccb1352eSJesse Gross  * @vport: vport that received the packet
465ccb1352eSJesse Gross  * @skb: skb that was received
4662694838dSJustin Pettit  * @tun_key: tunnel (if any) that carried packet
467ccb1352eSJesse Gross  *
468ccb1352eSJesse Gross  * Must be called with rcu_read_lock.  The packet cannot be shared and
469d176ca2aSCong Wang  * skb->data should point to the Ethernet header.
470ccb1352eSJesse Gross  */
4717d5437c7SPravin B Shelar void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
4721d8fff90SThomas Graf 		       const struct ip_tunnel_info *tun_info)
473ccb1352eSJesse Gross {
4748f84985fSLi RongQing 	struct pcpu_sw_netstats *stats;
4758c8b1b83SPravin B Shelar 	struct sw_flow_key key;
4768c8b1b83SPravin B Shelar 	int error;
477ccb1352eSJesse Gross 
478404f2f10SShan Wei 	stats = this_cpu_ptr(vport->percpu_stats);
479e0f0ecf3SPravin B Shelar 	u64_stats_update_begin(&stats->syncp);
480ccb1352eSJesse Gross 	stats->rx_packets++;
481df8a39deSJiri Pirko 	stats->rx_bytes += skb->len +
482df8a39deSJiri Pirko 			   (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
483e0f0ecf3SPravin B Shelar 	u64_stats_update_end(&stats->syncp);
484ccb1352eSJesse Gross 
48583c8df26SPravin B Shelar 	OVS_CB(skb)->input_vport = vport;
486f0b128c1SJesse Gross 	OVS_CB(skb)->egress_tun_info = NULL;
4877f8a436eSJoe Stringer 	OVS_CB(skb)->mru = 0;
4888c8b1b83SPravin B Shelar 	/* Extract flow from 'skb' into 'key'. */
489f0b128c1SJesse Gross 	error = ovs_flow_key_extract(tun_info, skb, &key);
4908c8b1b83SPravin B Shelar 	if (unlikely(error)) {
4918c8b1b83SPravin B Shelar 		kfree_skb(skb);
4928c8b1b83SPravin B Shelar 		return;
4938c8b1b83SPravin B Shelar 	}
4948c8b1b83SPravin B Shelar 	ovs_dp_process_packet(skb, &key);
495ccb1352eSJesse Gross }
4969ba559d9SPravin B Shelar EXPORT_SYMBOL_GPL(ovs_vport_receive);
497ccb1352eSJesse Gross 
498ccb1352eSJesse Gross /**
499ccb1352eSJesse Gross  *	ovs_vport_send - send a packet on a device
500ccb1352eSJesse Gross  *
501ccb1352eSJesse Gross  * @vport: vport on which to send the packet
502ccb1352eSJesse Gross  * @skb: skb to send
503ccb1352eSJesse Gross  *
5048e4e1713SPravin B Shelar  * Sends the given packet and returns the length of data sent.  Either ovs
505ccb1352eSJesse Gross  * lock or rcu_read_lock must be held.
506ccb1352eSJesse Gross  */
507ccb1352eSJesse Gross int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
508ccb1352eSJesse Gross {
509ccb1352eSJesse Gross 	int sent = vport->ops->send(vport, skb);
510ccb1352eSJesse Gross 
51191b7514cSPravin B Shelar 	if (likely(sent > 0)) {
5128f84985fSLi RongQing 		struct pcpu_sw_netstats *stats;
513ccb1352eSJesse Gross 
514404f2f10SShan Wei 		stats = this_cpu_ptr(vport->percpu_stats);
515ccb1352eSJesse Gross 
516e0f0ecf3SPravin B Shelar 		u64_stats_update_begin(&stats->syncp);
517ccb1352eSJesse Gross 		stats->tx_packets++;
518ccb1352eSJesse Gross 		stats->tx_bytes += sent;
519e0f0ecf3SPravin B Shelar 		u64_stats_update_end(&stats->syncp);
52091b7514cSPravin B Shelar 	} else if (sent < 0) {
52191b7514cSPravin B Shelar 		ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
522997e068eSPravin B Shelar 	} else {
52391b7514cSPravin B Shelar 		ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
524997e068eSPravin B Shelar 	}
525ccb1352eSJesse Gross 	return sent;
526ccb1352eSJesse Gross }
527ccb1352eSJesse Gross 
528ccb1352eSJesse Gross /**
529ccb1352eSJesse Gross  *	ovs_vport_record_error - indicate device error to generic stats layer
530ccb1352eSJesse Gross  *
531ccb1352eSJesse Gross  * @vport: vport that encountered the error
532ccb1352eSJesse Gross  * @err_type: one of enum vport_err_type types to indicate the error type
533ccb1352eSJesse Gross  *
534ccb1352eSJesse Gross  * If using the vport generic stats layer indicate that an error of the given
535af784163SAndy Hill  * type has occurred.
536ccb1352eSJesse Gross  */
537443cd88cSStephen Hemminger static void ovs_vport_record_error(struct vport *vport,
538443cd88cSStephen Hemminger 				   enum vport_err_type err_type)
539ccb1352eSJesse Gross {
540ccb1352eSJesse Gross 	switch (err_type) {
541ccb1352eSJesse Gross 	case VPORT_E_RX_DROPPED:
542e403adedSLi RongQing 		atomic_long_inc(&vport->err_stats.rx_dropped);
543ccb1352eSJesse Gross 		break;
544ccb1352eSJesse Gross 
545ccb1352eSJesse Gross 	case VPORT_E_RX_ERROR:
546e403adedSLi RongQing 		atomic_long_inc(&vport->err_stats.rx_errors);
547ccb1352eSJesse Gross 		break;
548ccb1352eSJesse Gross 
549ccb1352eSJesse Gross 	case VPORT_E_TX_DROPPED:
550e403adedSLi RongQing 		atomic_long_inc(&vport->err_stats.tx_dropped);
551ccb1352eSJesse Gross 		break;
552ccb1352eSJesse Gross 
553ccb1352eSJesse Gross 	case VPORT_E_TX_ERROR:
554e403adedSLi RongQing 		atomic_long_inc(&vport->err_stats.tx_errors);
555ccb1352eSJesse Gross 		break;
556a2bf91b5SPeter Senna Tschudin 	}
557ccb1352eSJesse Gross 
558ccb1352eSJesse Gross }
559aa310701SPravin B Shelar 
560aa310701SPravin B Shelar static void free_vport_rcu(struct rcu_head *rcu)
561aa310701SPravin B Shelar {
562aa310701SPravin B Shelar 	struct vport *vport = container_of(rcu, struct vport, rcu);
563aa310701SPravin B Shelar 
564aa310701SPravin B Shelar 	ovs_vport_free(vport);
565aa310701SPravin B Shelar }
566aa310701SPravin B Shelar 
567aa310701SPravin B Shelar void ovs_vport_deferred_free(struct vport *vport)
568aa310701SPravin B Shelar {
569aa310701SPravin B Shelar 	if (!vport)
570aa310701SPravin B Shelar 		return;
571aa310701SPravin B Shelar 
572aa310701SPravin B Shelar 	call_rcu(&vport->rcu, free_vport_rcu);
573aa310701SPravin B Shelar }
5749ba559d9SPravin B Shelar EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
5758f0aad6fSWenyu Zhang 
5761d8fff90SThomas Graf int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info,
5778f0aad6fSWenyu Zhang 			       struct net *net,
5781d8fff90SThomas Graf 			       const struct ip_tunnel_info *tun_info,
5798f0aad6fSWenyu Zhang 			       u8 ipproto,
5808f0aad6fSWenyu Zhang 			       u32 skb_mark,
5818f0aad6fSWenyu Zhang 			       __be16 tp_src,
5828f0aad6fSWenyu Zhang 			       __be16 tp_dst)
5838f0aad6fSWenyu Zhang {
5841d8fff90SThomas Graf 	const struct ip_tunnel_key *tun_key;
5858f0aad6fSWenyu Zhang 	struct rtable *rt;
5868f0aad6fSWenyu Zhang 	struct flowi4 fl;
5878f0aad6fSWenyu Zhang 
5888f0aad6fSWenyu Zhang 	if (unlikely(!tun_info))
5898f0aad6fSWenyu Zhang 		return -EINVAL;
5908f0aad6fSWenyu Zhang 
5911d8fff90SThomas Graf 	tun_key = &tun_info->key;
5928f0aad6fSWenyu Zhang 
5938f0aad6fSWenyu Zhang 	/* Route lookup to get srouce IP address.
5948f0aad6fSWenyu Zhang 	 * The process may need to be changed if the corresponding process
5958f0aad6fSWenyu Zhang 	 * in vports ops changed.
5968f0aad6fSWenyu Zhang 	 */
5973f4c1d87SFan Du 	rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
5988f0aad6fSWenyu Zhang 	if (IS_ERR(rt))
5998f0aad6fSWenyu Zhang 		return PTR_ERR(rt);
6008f0aad6fSWenyu Zhang 
6018f0aad6fSWenyu Zhang 	ip_rt_put(rt);
6028f0aad6fSWenyu Zhang 
6038f0aad6fSWenyu Zhang 	/* Generate egress_tun_info based on tun_info,
6048f0aad6fSWenyu Zhang 	 * saddr, tp_src and tp_dst
6058f0aad6fSWenyu Zhang 	 */
6061d8fff90SThomas Graf 	__ip_tunnel_info_init(egress_tun_info,
607c1ea5d67SJiri Benc 			      fl.saddr, tun_key->u.ipv4.dst,
6087c383fb2SJiri Benc 			      tun_key->tos,
6097c383fb2SJiri Benc 			      tun_key->ttl,
6108f0aad6fSWenyu Zhang 			      tp_src, tp_dst,
6118f0aad6fSWenyu Zhang 			      tun_key->tun_id,
6128f0aad6fSWenyu Zhang 			      tun_key->tun_flags,
6138f0aad6fSWenyu Zhang 			      tun_info->options,
6148f0aad6fSWenyu Zhang 			      tun_info->options_len);
6158f0aad6fSWenyu Zhang 
6168f0aad6fSWenyu Zhang 	return 0;
6178f0aad6fSWenyu Zhang }
6188f0aad6fSWenyu Zhang EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
6198f0aad6fSWenyu Zhang 
6208f0aad6fSWenyu Zhang int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
6211d8fff90SThomas Graf 				  struct ip_tunnel_info *info)
6228f0aad6fSWenyu Zhang {
6238f0aad6fSWenyu Zhang 	/* get_egress_tun_info() is only implemented on tunnel ports. */
6248f0aad6fSWenyu Zhang 	if (unlikely(!vport->ops->get_egress_tun_info))
6258f0aad6fSWenyu Zhang 		return -EINVAL;
6268f0aad6fSWenyu Zhang 
6278f0aad6fSWenyu Zhang 	return vport->ops->get_egress_tun_info(vport, skb, info);
6288f0aad6fSWenyu Zhang }
629