xref: /openbmc/linux/net/openvswitch/datapath.h (revision 35d39fec)
1c9422999SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */
2ccb1352eSJesse Gross /*
3971427f3SAndy Zhou  * Copyright (c) 2007-2014 Nicira, Inc.
4ccb1352eSJesse Gross  */
5ccb1352eSJesse Gross 
6ccb1352eSJesse Gross #ifndef DATAPATH_H
7ccb1352eSJesse Gross #define DATAPATH_H 1
8ccb1352eSJesse Gross 
9ccb1352eSJesse Gross #include <asm/page.h>
10ccb1352eSJesse Gross #include <linux/kernel.h>
11ccb1352eSJesse Gross #include <linux/mutex.h>
12ccb1352eSJesse Gross #include <linux/netdevice.h>
13ccb1352eSJesse Gross #include <linux/skbuff.h>
14ccb1352eSJesse Gross #include <linux/u64_stats_sync.h>
151d8fff90SThomas Graf #include <net/ip_tunnels.h>
16ccb1352eSJesse Gross 
177f8a436eSJoe Stringer #include "conntrack.h"
18ccb1352eSJesse Gross #include "flow.h"
19e6445719SPravin B Shelar #include "flow_table.h"
20cd8a6c33SAndy Zhou #include "meter.h"
219602c01eSAndy Zhou #include "vport-internal_dev.h"
22ccb1352eSJesse Gross 
2315eac2a7SPravin B Shelar #define DP_MAX_PORTS                USHRT_MAX
2415eac2a7SPravin B Shelar #define DP_VPORT_HASH_BUCKETS       1024
25eac87c41SEelco Chaudron #define DP_MASKS_REBALANCE_INTERVAL 4000
2615eac2a7SPravin B Shelar 
27ccb1352eSJesse Gross /**
28ccb1352eSJesse Gross  * struct dp_stats_percpu - per-cpu packet processing statistics for a given
29ccb1352eSJesse Gross  * datapath.
30ccb1352eSJesse Gross  * @n_hit: Number of received packets for which a matching flow was found in
31ccb1352eSJesse Gross  * the flow table.
32ccb1352eSJesse Gross  * @n_miss: Number of received packets that had no matching flow in the flow
33ccb1352eSJesse Gross  * table.  The sum of @n_hit and @n_miss is the number of packets that have
34ccb1352eSJesse Gross  * been received by the datapath.
35ccb1352eSJesse Gross  * @n_lost: Number of received packets that had no matching flow in the flow
36ccb1352eSJesse Gross  * table that could not be sent to userspace (normally due to an overflow in
37ccb1352eSJesse Gross  * one of the datapath's queues).
381bd7116fSAndy Zhou  * @n_mask_hit: Number of masks looked up for flow match.
391bd7116fSAndy Zhou  *   @n_mask_hit / (@n_hit + @n_missed)  will be the average masks looked
401bd7116fSAndy Zhou  *   up per packet.
419d2f627bSEelco Chaudron  * @n_cache_hit: The number of received packets that had their mask found using
429d2f627bSEelco Chaudron  * the mask cache.
43ccb1352eSJesse Gross  */
44ccb1352eSJesse Gross struct dp_stats_percpu {
45ccb1352eSJesse Gross 	u64 n_hit;
46ccb1352eSJesse Gross 	u64 n_missed;
47ccb1352eSJesse Gross 	u64 n_lost;
481bd7116fSAndy Zhou 	u64 n_mask_hit;
499d2f627bSEelco Chaudron 	u64 n_cache_hit;
50df9d9fdfSWANG Cong 	struct u64_stats_sync syncp;
51ccb1352eSJesse Gross };
52ccb1352eSJesse Gross 
53ccb1352eSJesse Gross /**
54*b83d23a2SMark Gray  * struct dp_nlsk_pids - array of netlink portids of for a datapath.
55*b83d23a2SMark Gray  *                       This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU
56*b83d23a2SMark Gray  *                       is enabled and must be protected by rcu.
57*b83d23a2SMark Gray  * @rcu: RCU callback head for deferred destruction.
58*b83d23a2SMark Gray  * @n_pids: Size of @pids array.
59*b83d23a2SMark Gray  * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets
60*b83d23a2SMark Gray  *       that miss the flow table.
61*b83d23a2SMark Gray  */
62*b83d23a2SMark Gray struct dp_nlsk_pids {
63*b83d23a2SMark Gray 	struct rcu_head rcu;
64*b83d23a2SMark Gray 	u32 n_pids;
65*b83d23a2SMark Gray 	u32 pids[];
66*b83d23a2SMark Gray };
67*b83d23a2SMark Gray 
68*b83d23a2SMark Gray /**
69ccb1352eSJesse Gross  * struct datapath - datapath for flow-based packet switching
70ccb1352eSJesse Gross  * @rcu: RCU callback head for deferred destruction.
71ccb1352eSJesse Gross  * @list_node: Element in global 'dps' list.
72b637e498SPravin B Shelar  * @table: flow table.
7315eac2a7SPravin B Shelar  * @ports: Hash table for ports.  %OVSP_LOCAL port always exists.  Protected by
748e4e1713SPravin B Shelar  * ovs_mutex and RCU.
75ccb1352eSJesse Gross  * @stats_percpu: Per-CPU datapath statistics.
7646df7b81SPravin B Shelar  * @net: Reference to net namespace.
773a927bc7SPaolo Abeni  * @max_headroom: the maximum headroom of all vports in this datapath; it will
783a927bc7SPaolo Abeni  * be used by all the internal vports in this dp.
79*b83d23a2SMark Gray  * @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
80ccb1352eSJesse Gross  *
81ccb1352eSJesse Gross  * Context: See the comment on locking at the top of datapath.c for additional
82ccb1352eSJesse Gross  * locking information.
83ccb1352eSJesse Gross  */
84ccb1352eSJesse Gross struct datapath {
85ccb1352eSJesse Gross 	struct rcu_head rcu;
86ccb1352eSJesse Gross 	struct list_head list_node;
87ccb1352eSJesse Gross 
88ccb1352eSJesse Gross 	/* Flow table. */
89b637e498SPravin B Shelar 	struct flow_table table;
90ccb1352eSJesse Gross 
91ccb1352eSJesse Gross 	/* Switch ports. */
9215eac2a7SPravin B Shelar 	struct hlist_head *ports;
93ccb1352eSJesse Gross 
94ccb1352eSJesse Gross 	/* Stats. */
95ccb1352eSJesse Gross 	struct dp_stats_percpu __percpu *stats_percpu;
9646df7b81SPravin B Shelar 
9746df7b81SPravin B Shelar 	/* Network namespace ref. */
980c5c9fb5SEric W. Biederman 	possible_net_t net;
9943d4be9cSThomas Graf 
10043d4be9cSThomas Graf 	u32 user_features;
1013a927bc7SPaolo Abeni 
1023a927bc7SPaolo Abeni 	u32 max_headroom;
10396fbc13dSAndy Zhou 
10496fbc13dSAndy Zhou 	/* Switch meters. */
105c7c4c44cSTonghao Zhang 	struct dp_meter_table meter_tbl;
106*b83d23a2SMark Gray 
107*b83d23a2SMark Gray 	struct dp_nlsk_pids __rcu *upcall_portids;
108ccb1352eSJesse Gross };
109ccb1352eSJesse Gross 
110ccb1352eSJesse Gross /**
111ccb1352eSJesse Gross  * struct ovs_skb_cb - OVS data in skb CB
11283c8df26SPravin B Shelar  * @input_vport: The original vport packet came in on. This value is cached
11383c8df26SPravin B Shelar  * when a packet is received by OVS.
1147f8a436eSJoe Stringer  * @mru: The maximum received fragement size; 0 if the packet is not
1157f8a436eSJoe Stringer  * fragmented.
116494bea39SLiping Zhang  * @acts_origlen: The netlink size of the flow actions applied to this skb.
11752427fa0SDaniel Axtens  * @cutlen: The number of bytes from the packet end to be removed.
118ccb1352eSJesse Gross  */
119ccb1352eSJesse Gross struct ovs_skb_cb {
12083c8df26SPravin B Shelar 	struct vport		*input_vport;
1217f8a436eSJoe Stringer 	u16			mru;
122494bea39SLiping Zhang 	u16			acts_origlen;
123f2a4d086SWilliam Tu 	u32			cutlen;
124ccb1352eSJesse Gross };
125ccb1352eSJesse Gross #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
126ccb1352eSJesse Gross 
127ccb1352eSJesse Gross /**
128ccb1352eSJesse Gross  * struct dp_upcall - metadata to include with a packet to send to userspace
129ccb1352eSJesse Gross  * @cmd: One of %OVS_PACKET_CMD_*.
1304490108bSBen Pfaff  * @userdata: If nonnull, its variable-length value is passed to userspace as
131ccb1352eSJesse Gross  * %OVS_PACKET_ATTR_USERDATA.
132e8eedb85SPravin B Shelar  * @portid: Netlink portid to which packet should be sent.  If @portid is 0
133e8eedb85SPravin B Shelar  * then no packet is sent and the packet is accounted in the datapath's @n_lost
134ccb1352eSJesse Gross  * counter.
1358f0aad6fSWenyu Zhang  * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
1367f8a436eSJoe Stringer  * @mru: If not zero, Maximum received IP fragment size.
137ccb1352eSJesse Gross  */
138ccb1352eSJesse Gross struct dp_upcall_info {
1394c222798SPravin B Shelar 	struct ip_tunnel_info *egress_tun_info;
140ccb1352eSJesse Gross 	const struct nlattr *userdata;
141ccea7445SNeil McKee 	const struct nlattr *actions;
142ccea7445SNeil McKee 	int actions_len;
14315e47304SEric W. Biederman 	u32 portid;
144e8eedb85SPravin B Shelar 	u8 cmd;
1457f8a436eSJoe Stringer 	u16 mru;
146ccb1352eSJesse Gross };
147ccb1352eSJesse Gross 
1488e4e1713SPravin B Shelar /**
1498e4e1713SPravin B Shelar  * struct ovs_net - Per net-namespace data for ovs.
1508e4e1713SPravin B Shelar  * @dps: List of datapaths to enable dumping them all out.
1518e4e1713SPravin B Shelar  * Protected by genl_mutex.
1528e4e1713SPravin B Shelar  */
1538e4e1713SPravin B Shelar struct ovs_net {
1548e4e1713SPravin B Shelar 	struct list_head dps;
1558e4e1713SPravin B Shelar 	struct work_struct dp_notify_work;
156a65878d6SEelco Chaudron 	struct delayed_work masks_rebalance;
15711efd5cbSYi-Hung Wei #if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
15811efd5cbSYi-Hung Wei 	struct ovs_ct_limit_info *ct_limit_info;
15911efd5cbSYi-Hung Wei #endif
160c2ac6673SJoe Stringer 
161c2ac6673SJoe Stringer 	/* Module reference for configuring conntrack. */
162c2ac6673SJoe Stringer 	bool xt_label;
1638e4e1713SPravin B Shelar };
1648e4e1713SPravin B Shelar 
165bd1903b7STonghao Zhang /**
166bd1903b7STonghao Zhang  * enum ovs_pkt_hash_types - hash info to include with a packet
167bd1903b7STonghao Zhang  * to send to userspace.
168bd1903b7STonghao Zhang  * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
169bd1903b7STonghao Zhang  * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
170bd1903b7STonghao Zhang  * over transport ports.
171bd1903b7STonghao Zhang  */
172bd1903b7STonghao Zhang enum ovs_pkt_hash_types {
173bd1903b7STonghao Zhang 	OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
174bd1903b7STonghao Zhang 	OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
175bd1903b7STonghao Zhang };
176bd1903b7STonghao Zhang 
177c7d03a00SAlexey Dobriyan extern unsigned int ovs_net_id;
1788e4e1713SPravin B Shelar void ovs_lock(void);
1798e4e1713SPravin B Shelar void ovs_unlock(void);
1808e4e1713SPravin B Shelar 
1818e4e1713SPravin B Shelar #ifdef CONFIG_LOCKDEP
1828e4e1713SPravin B Shelar int lockdep_ovsl_is_held(void);
1838e4e1713SPravin B Shelar #else
1848e4e1713SPravin B Shelar #define lockdep_ovsl_is_held()	1
1858e4e1713SPravin B Shelar #endif
1868e4e1713SPravin B Shelar 
18780019d31SThomas Graf #define ASSERT_OVSL()		WARN_ON(!lockdep_ovsl_is_held())
1888e4e1713SPravin B Shelar #define ovsl_dereference(p)					\
1898e4e1713SPravin B Shelar 	rcu_dereference_protected(p, lockdep_ovsl_is_held())
190663efa36SJesse Gross #define rcu_dereference_ovsl(p)					\
191663efa36SJesse Gross 	rcu_dereference_check(p, lockdep_ovsl_is_held())
1928e4e1713SPravin B Shelar 
ovs_dp_get_net(const struct datapath * dp)19312eb18f7SThomas Graf static inline struct net *ovs_dp_get_net(const struct datapath *dp)
19446df7b81SPravin B Shelar {
19546df7b81SPravin B Shelar 	return read_pnet(&dp->net);
19646df7b81SPravin B Shelar }
19746df7b81SPravin B Shelar 
ovs_dp_set_net(struct datapath * dp,struct net * net)19846df7b81SPravin B Shelar static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
19946df7b81SPravin B Shelar {
20046df7b81SPravin B Shelar 	write_pnet(&dp->net, net);
20146df7b81SPravin B Shelar }
20246df7b81SPravin B Shelar 
2038e4e1713SPravin B Shelar struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
2048e4e1713SPravin B Shelar 
ovs_vport_rcu(const struct datapath * dp,int port_no)2058e4e1713SPravin B Shelar static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
2068e4e1713SPravin B Shelar {
2078e4e1713SPravin B Shelar 	WARN_ON_ONCE(!rcu_read_lock_held());
2088e4e1713SPravin B Shelar 	return ovs_lookup_vport(dp, port_no);
2098e4e1713SPravin B Shelar }
2108e4e1713SPravin B Shelar 
ovs_vport_ovsl_rcu(const struct datapath * dp,int port_no)2118e4e1713SPravin B Shelar static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
2128e4e1713SPravin B Shelar {
2138e4e1713SPravin B Shelar 	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
2148e4e1713SPravin B Shelar 	return ovs_lookup_vport(dp, port_no);
2158e4e1713SPravin B Shelar }
2168e4e1713SPravin B Shelar 
ovs_vport_ovsl(const struct datapath * dp,int port_no)2178e4e1713SPravin B Shelar static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
2188e4e1713SPravin B Shelar {
2198e4e1713SPravin B Shelar 	ASSERT_OVSL();
2208e4e1713SPravin B Shelar 	return ovs_lookup_vport(dp, port_no);
2218e4e1713SPravin B Shelar }
2228e4e1713SPravin B Shelar 
2239602c01eSAndy Zhou /* Must be called with rcu_read_lock. */
get_dp_rcu(struct net * net,int dp_ifindex)2249602c01eSAndy Zhou static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
2259602c01eSAndy Zhou {
2269602c01eSAndy Zhou 	struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
2279602c01eSAndy Zhou 
2289602c01eSAndy Zhou 	if (dev) {
2299602c01eSAndy Zhou 		struct vport *vport = ovs_internal_dev_get_vport(dev);
2309602c01eSAndy Zhou 
2319602c01eSAndy Zhou 		if (vport)
2329602c01eSAndy Zhou 			return vport->dp;
2339602c01eSAndy Zhou 	}
2349602c01eSAndy Zhou 
2359602c01eSAndy Zhou 	return NULL;
2369602c01eSAndy Zhou }
2379602c01eSAndy Zhou 
2389602c01eSAndy Zhou /* The caller must hold either ovs_mutex or rcu_read_lock to keep the
2399602c01eSAndy Zhou  * returned dp pointer valid.
2409602c01eSAndy Zhou  */
get_dp(struct net * net,int dp_ifindex)2419602c01eSAndy Zhou static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
2429602c01eSAndy Zhou {
2439602c01eSAndy Zhou 	struct datapath *dp;
2449602c01eSAndy Zhou 
2459602c01eSAndy Zhou 	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
2469602c01eSAndy Zhou 	rcu_read_lock();
2479602c01eSAndy Zhou 	dp = get_dp_rcu(net, dp_ifindex);
2489602c01eSAndy Zhou 	rcu_read_unlock();
2499602c01eSAndy Zhou 
2509602c01eSAndy Zhou 	return dp;
2519602c01eSAndy Zhou }
2529602c01eSAndy Zhou 
253ccb1352eSJesse Gross extern struct notifier_block ovs_dp_device_notifier;
25468eb5503SJohannes Berg extern struct genl_family dp_vport_genl_family;
255ccb1352eSJesse Gross 
2568c8b1b83SPravin B Shelar void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
257ccb1352eSJesse Gross void ovs_dp_detach_port(struct vport *);
258ccb1352eSJesse Gross int ovs_dp_upcall(struct datapath *, struct sk_buff *,
259f2a4d086SWilliam Tu 		  const struct sw_flow_key *, const struct dp_upcall_info *,
260f2a4d086SWilliam Tu 		  uint32_t cutlen);
261ccb1352eSJesse Gross 
262*b83d23a2SMark Gray u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id);
263*b83d23a2SMark Gray 
264971427f3SAndy Zhou const char *ovs_dp_name(const struct datapath *dp);
2659354d452SJiri Benc struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2669354d452SJiri Benc 					 u32 portid, u32 seq, u8 cmd);
267ccb1352eSJesse Gross 
2682ff3e4e4SPravin B Shelar int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
26912eb18f7SThomas Graf 			const struct sw_flow_actions *, struct sw_flow_key *);
270971427f3SAndy Zhou 
2718e4e1713SPravin B Shelar void ovs_dp_notify_wq(struct work_struct *work);
27203f0d916SAndy Zhou 
273971427f3SAndy Zhou int action_fifos_init(void);
274971427f3SAndy Zhou void action_fifos_exit(void);
275971427f3SAndy Zhou 
276be26b9a8SJoe Stringer /* 'KEY' must not have any bits set outside of the 'MASK' */
277be26b9a8SJoe Stringer #define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
278be26b9a8SJoe Stringer #define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
279be26b9a8SJoe Stringer 
28005da5898SJarno Rajahalme #define OVS_NLERR(logging_allowed, fmt, ...)			\
2811815a883SJoe Perches do {								\
28205da5898SJarno Rajahalme 	if (logging_allowed && net_ratelimit())			\
28305da5898SJarno Rajahalme 		pr_info("netlink: " fmt "\n", ##__VA_ARGS__);	\
2841815a883SJoe Perches } while (0)
285ccb1352eSJesse Gross #endif /* datapath.h */
286