xref: /openbmc/linux/net/openvswitch/datapath.h (revision eac87c41)
1c9422999SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */
2ccb1352eSJesse Gross /*
3971427f3SAndy Zhou  * Copyright (c) 2007-2014 Nicira, Inc.
4ccb1352eSJesse Gross  */
5ccb1352eSJesse Gross 
6ccb1352eSJesse Gross #ifndef DATAPATH_H
7ccb1352eSJesse Gross #define DATAPATH_H 1
8ccb1352eSJesse Gross 
9ccb1352eSJesse Gross #include <asm/page.h>
10ccb1352eSJesse Gross #include <linux/kernel.h>
11ccb1352eSJesse Gross #include <linux/mutex.h>
12ccb1352eSJesse Gross #include <linux/netdevice.h>
13ccb1352eSJesse Gross #include <linux/skbuff.h>
14ccb1352eSJesse Gross #include <linux/u64_stats_sync.h>
151d8fff90SThomas Graf #include <net/ip_tunnels.h>
16ccb1352eSJesse Gross 
177f8a436eSJoe Stringer #include "conntrack.h"
18ccb1352eSJesse Gross #include "flow.h"
19e6445719SPravin B Shelar #include "flow_table.h"
20cd8a6c33SAndy Zhou #include "meter.h"
219602c01eSAndy Zhou #include "vport-internal_dev.h"
22ccb1352eSJesse Gross 
2315eac2a7SPravin B Shelar #define DP_MAX_PORTS                USHRT_MAX
2415eac2a7SPravin B Shelar #define DP_VPORT_HASH_BUCKETS       1024
25eac87c41SEelco Chaudron #define DP_MASKS_REBALANCE_INTERVAL 4000
2615eac2a7SPravin B Shelar 
27ccb1352eSJesse Gross /**
28ccb1352eSJesse Gross  * struct dp_stats_percpu - per-cpu packet processing statistics for a given
29ccb1352eSJesse Gross  * datapath.
30ccb1352eSJesse Gross  * @n_hit: Number of received packets for which a matching flow was found in
31ccb1352eSJesse Gross  * the flow table.
32ccb1352eSJesse Gross  * @n_miss: Number of received packets that had no matching flow in the flow
33ccb1352eSJesse Gross  * table.  The sum of @n_hit and @n_miss is the number of packets that have
34ccb1352eSJesse Gross  * been received by the datapath.
35ccb1352eSJesse Gross  * @n_lost: Number of received packets that had no matching flow in the flow
36ccb1352eSJesse Gross  * table that could not be sent to userspace (normally due to an overflow in
37ccb1352eSJesse Gross  * one of the datapath's queues).
381bd7116fSAndy Zhou  * @n_mask_hit: Number of masks looked up for flow match.
391bd7116fSAndy Zhou  *   @n_mask_hit / (@n_hit + @n_missed)  will be the average masks looked
401bd7116fSAndy Zhou  *   up per packet.
41ccb1352eSJesse Gross  */
42ccb1352eSJesse Gross struct dp_stats_percpu {
43ccb1352eSJesse Gross 	u64 n_hit;
44ccb1352eSJesse Gross 	u64 n_missed;
45ccb1352eSJesse Gross 	u64 n_lost;
461bd7116fSAndy Zhou 	u64 n_mask_hit;
47df9d9fdfSWANG Cong 	struct u64_stats_sync syncp;
48ccb1352eSJesse Gross };
49ccb1352eSJesse Gross 
50ccb1352eSJesse Gross /**
51ccb1352eSJesse Gross  * struct datapath - datapath for flow-based packet switching
52ccb1352eSJesse Gross  * @rcu: RCU callback head for deferred destruction.
53ccb1352eSJesse Gross  * @list_node: Element in global 'dps' list.
54b637e498SPravin B Shelar  * @table: flow table.
5515eac2a7SPravin B Shelar  * @ports: Hash table for ports.  %OVSP_LOCAL port always exists.  Protected by
568e4e1713SPravin B Shelar  * ovs_mutex and RCU.
57ccb1352eSJesse Gross  * @stats_percpu: Per-CPU datapath statistics.
5846df7b81SPravin B Shelar  * @net: Reference to net namespace.
593a927bc7SPaolo Abeni  * @max_headroom: the maximum headroom of all vports in this datapath; it will
603a927bc7SPaolo Abeni  * be used by all the internal vports in this dp.
61ccb1352eSJesse Gross  *
62ccb1352eSJesse Gross  * Context: See the comment on locking at the top of datapath.c for additional
63ccb1352eSJesse Gross  * locking information.
64ccb1352eSJesse Gross  */
65ccb1352eSJesse Gross struct datapath {
66ccb1352eSJesse Gross 	struct rcu_head rcu;
67ccb1352eSJesse Gross 	struct list_head list_node;
68ccb1352eSJesse Gross 
69ccb1352eSJesse Gross 	/* Flow table. */
70b637e498SPravin B Shelar 	struct flow_table table;
71ccb1352eSJesse Gross 
72ccb1352eSJesse Gross 	/* Switch ports. */
7315eac2a7SPravin B Shelar 	struct hlist_head *ports;
74ccb1352eSJesse Gross 
75ccb1352eSJesse Gross 	/* Stats. */
76ccb1352eSJesse Gross 	struct dp_stats_percpu __percpu *stats_percpu;
7746df7b81SPravin B Shelar 
7846df7b81SPravin B Shelar 	/* Network namespace ref. */
790c5c9fb5SEric W. Biederman 	possible_net_t net;
8043d4be9cSThomas Graf 
8143d4be9cSThomas Graf 	u32 user_features;
823a927bc7SPaolo Abeni 
833a927bc7SPaolo Abeni 	u32 max_headroom;
8496fbc13dSAndy Zhou 
8596fbc13dSAndy Zhou 	/* Switch meters. */
86c7c4c44cSTonghao Zhang 	struct dp_meter_table meter_tbl;
87eac87c41SEelco Chaudron 
88eac87c41SEelco Chaudron 	/* re-balance flow masks timer */
89eac87c41SEelco Chaudron 	struct delayed_work masks_rebalance;
90ccb1352eSJesse Gross };
91ccb1352eSJesse Gross 
92ccb1352eSJesse Gross /**
93ccb1352eSJesse Gross  * struct ovs_skb_cb - OVS data in skb CB
9483c8df26SPravin B Shelar  * @input_vport: The original vport packet came in on. This value is cached
9583c8df26SPravin B Shelar  * when a packet is received by OVS.
967f8a436eSJoe Stringer  * @mru: The maximum received fragement size; 0 if the packet is not
977f8a436eSJoe Stringer  * fragmented.
98494bea39SLiping Zhang  * @acts_origlen: The netlink size of the flow actions applied to this skb.
9952427fa0SDaniel Axtens  * @cutlen: The number of bytes from the packet end to be removed.
100ccb1352eSJesse Gross  */
101ccb1352eSJesse Gross struct ovs_skb_cb {
10283c8df26SPravin B Shelar 	struct vport		*input_vport;
1037f8a436eSJoe Stringer 	u16			mru;
104494bea39SLiping Zhang 	u16			acts_origlen;
105f2a4d086SWilliam Tu 	u32			cutlen;
106ccb1352eSJesse Gross };
107ccb1352eSJesse Gross #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
108ccb1352eSJesse Gross 
109ccb1352eSJesse Gross /**
110ccb1352eSJesse Gross  * struct dp_upcall - metadata to include with a packet to send to userspace
111ccb1352eSJesse Gross  * @cmd: One of %OVS_PACKET_CMD_*.
1124490108bSBen Pfaff  * @userdata: If nonnull, its variable-length value is passed to userspace as
113ccb1352eSJesse Gross  * %OVS_PACKET_ATTR_USERDATA.
114e8eedb85SPravin B Shelar  * @portid: Netlink portid to which packet should be sent.  If @portid is 0
115e8eedb85SPravin B Shelar  * then no packet is sent and the packet is accounted in the datapath's @n_lost
116ccb1352eSJesse Gross  * counter.
1178f0aad6fSWenyu Zhang  * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
1187f8a436eSJoe Stringer  * @mru: If not zero, Maximum received IP fragment size.
119ccb1352eSJesse Gross  */
120ccb1352eSJesse Gross struct dp_upcall_info {
1214c222798SPravin B Shelar 	struct ip_tunnel_info *egress_tun_info;
122ccb1352eSJesse Gross 	const struct nlattr *userdata;
123ccea7445SNeil McKee 	const struct nlattr *actions;
124ccea7445SNeil McKee 	int actions_len;
12515e47304SEric W. Biederman 	u32 portid;
126e8eedb85SPravin B Shelar 	u8 cmd;
1277f8a436eSJoe Stringer 	u16 mru;
128ccb1352eSJesse Gross };
129ccb1352eSJesse Gross 
1308e4e1713SPravin B Shelar /**
1318e4e1713SPravin B Shelar  * struct ovs_net - Per net-namespace data for ovs.
1328e4e1713SPravin B Shelar  * @dps: List of datapaths to enable dumping them all out.
1338e4e1713SPravin B Shelar  * Protected by genl_mutex.
1348e4e1713SPravin B Shelar  */
1358e4e1713SPravin B Shelar struct ovs_net {
1368e4e1713SPravin B Shelar 	struct list_head dps;
1378e4e1713SPravin B Shelar 	struct work_struct dp_notify_work;
13811efd5cbSYi-Hung Wei #if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
13911efd5cbSYi-Hung Wei 	struct ovs_ct_limit_info *ct_limit_info;
14011efd5cbSYi-Hung Wei #endif
141c2ac6673SJoe Stringer 
142c2ac6673SJoe Stringer 	/* Module reference for configuring conntrack. */
143c2ac6673SJoe Stringer 	bool xt_label;
1448e4e1713SPravin B Shelar };
1458e4e1713SPravin B Shelar 
146bd1903b7STonghao Zhang /**
147bd1903b7STonghao Zhang  * enum ovs_pkt_hash_types - hash info to include with a packet
148bd1903b7STonghao Zhang  * to send to userspace.
149bd1903b7STonghao Zhang  * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
150bd1903b7STonghao Zhang  * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
151bd1903b7STonghao Zhang  * over transport ports.
152bd1903b7STonghao Zhang  */
153bd1903b7STonghao Zhang enum ovs_pkt_hash_types {
154bd1903b7STonghao Zhang 	OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
155bd1903b7STonghao Zhang 	OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
156bd1903b7STonghao Zhang };
157bd1903b7STonghao Zhang 
158c7d03a00SAlexey Dobriyan extern unsigned int ovs_net_id;
1598e4e1713SPravin B Shelar void ovs_lock(void);
1608e4e1713SPravin B Shelar void ovs_unlock(void);
1618e4e1713SPravin B Shelar 
1628e4e1713SPravin B Shelar #ifdef CONFIG_LOCKDEP
1638e4e1713SPravin B Shelar int lockdep_ovsl_is_held(void);
1648e4e1713SPravin B Shelar #else
1658e4e1713SPravin B Shelar #define lockdep_ovsl_is_held()	1
1668e4e1713SPravin B Shelar #endif
1678e4e1713SPravin B Shelar 
16880019d31SThomas Graf #define ASSERT_OVSL()		WARN_ON(!lockdep_ovsl_is_held())
1698e4e1713SPravin B Shelar #define ovsl_dereference(p)					\
1708e4e1713SPravin B Shelar 	rcu_dereference_protected(p, lockdep_ovsl_is_held())
171663efa36SJesse Gross #define rcu_dereference_ovsl(p)					\
172663efa36SJesse Gross 	rcu_dereference_check(p, lockdep_ovsl_is_held())
1738e4e1713SPravin B Shelar 
17412eb18f7SThomas Graf static inline struct net *ovs_dp_get_net(const struct datapath *dp)
17546df7b81SPravin B Shelar {
17646df7b81SPravin B Shelar 	return read_pnet(&dp->net);
17746df7b81SPravin B Shelar }
17846df7b81SPravin B Shelar 
17946df7b81SPravin B Shelar static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
18046df7b81SPravin B Shelar {
18146df7b81SPravin B Shelar 	write_pnet(&dp->net, net);
18246df7b81SPravin B Shelar }
18346df7b81SPravin B Shelar 
1848e4e1713SPravin B Shelar struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
1858e4e1713SPravin B Shelar 
1868e4e1713SPravin B Shelar static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
1878e4e1713SPravin B Shelar {
1888e4e1713SPravin B Shelar 	WARN_ON_ONCE(!rcu_read_lock_held());
1898e4e1713SPravin B Shelar 	return ovs_lookup_vport(dp, port_no);
1908e4e1713SPravin B Shelar }
1918e4e1713SPravin B Shelar 
1928e4e1713SPravin B Shelar static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
1938e4e1713SPravin B Shelar {
1948e4e1713SPravin B Shelar 	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
1958e4e1713SPravin B Shelar 	return ovs_lookup_vport(dp, port_no);
1968e4e1713SPravin B Shelar }
1978e4e1713SPravin B Shelar 
1988e4e1713SPravin B Shelar static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
1998e4e1713SPravin B Shelar {
2008e4e1713SPravin B Shelar 	ASSERT_OVSL();
2018e4e1713SPravin B Shelar 	return ovs_lookup_vport(dp, port_no);
2028e4e1713SPravin B Shelar }
2038e4e1713SPravin B Shelar 
2049602c01eSAndy Zhou /* Must be called with rcu_read_lock. */
2059602c01eSAndy Zhou static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
2069602c01eSAndy Zhou {
2079602c01eSAndy Zhou 	struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
2089602c01eSAndy Zhou 
2099602c01eSAndy Zhou 	if (dev) {
2109602c01eSAndy Zhou 		struct vport *vport = ovs_internal_dev_get_vport(dev);
2119602c01eSAndy Zhou 
2129602c01eSAndy Zhou 		if (vport)
2139602c01eSAndy Zhou 			return vport->dp;
2149602c01eSAndy Zhou 	}
2159602c01eSAndy Zhou 
2169602c01eSAndy Zhou 	return NULL;
2179602c01eSAndy Zhou }
2189602c01eSAndy Zhou 
2199602c01eSAndy Zhou /* The caller must hold either ovs_mutex or rcu_read_lock to keep the
2209602c01eSAndy Zhou  * returned dp pointer valid.
2219602c01eSAndy Zhou  */
2229602c01eSAndy Zhou static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
2239602c01eSAndy Zhou {
2249602c01eSAndy Zhou 	struct datapath *dp;
2259602c01eSAndy Zhou 
2269602c01eSAndy Zhou 	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
2279602c01eSAndy Zhou 	rcu_read_lock();
2289602c01eSAndy Zhou 	dp = get_dp_rcu(net, dp_ifindex);
2299602c01eSAndy Zhou 	rcu_read_unlock();
2309602c01eSAndy Zhou 
2319602c01eSAndy Zhou 	return dp;
2329602c01eSAndy Zhou }
2339602c01eSAndy Zhou 
234ccb1352eSJesse Gross extern struct notifier_block ovs_dp_device_notifier;
23568eb5503SJohannes Berg extern struct genl_family dp_vport_genl_family;
236ccb1352eSJesse Gross 
23795a7233cSPaul Blakey DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
23895a7233cSPaul Blakey 
2398c8b1b83SPravin B Shelar void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
240ccb1352eSJesse Gross void ovs_dp_detach_port(struct vport *);
241ccb1352eSJesse Gross int ovs_dp_upcall(struct datapath *, struct sk_buff *,
242f2a4d086SWilliam Tu 		  const struct sw_flow_key *, const struct dp_upcall_info *,
243f2a4d086SWilliam Tu 		  uint32_t cutlen);
244ccb1352eSJesse Gross 
245971427f3SAndy Zhou const char *ovs_dp_name(const struct datapath *dp);
2469354d452SJiri Benc struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2479354d452SJiri Benc 					 u32 portid, u32 seq, u8 cmd);
248ccb1352eSJesse Gross 
2492ff3e4e4SPravin B Shelar int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
25012eb18f7SThomas Graf 			const struct sw_flow_actions *, struct sw_flow_key *);
251971427f3SAndy Zhou 
2528e4e1713SPravin B Shelar void ovs_dp_notify_wq(struct work_struct *work);
25303f0d916SAndy Zhou 
254971427f3SAndy Zhou int action_fifos_init(void);
255971427f3SAndy Zhou void action_fifos_exit(void);
256971427f3SAndy Zhou 
257be26b9a8SJoe Stringer /* 'KEY' must not have any bits set outside of the 'MASK' */
258be26b9a8SJoe Stringer #define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
259be26b9a8SJoe Stringer #define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
260be26b9a8SJoe Stringer 
26105da5898SJarno Rajahalme #define OVS_NLERR(logging_allowed, fmt, ...)			\
2621815a883SJoe Perches do {								\
26305da5898SJarno Rajahalme 	if (logging_allowed && net_ratelimit())			\
26405da5898SJarno Rajahalme 		pr_info("netlink: " fmt "\n", ##__VA_ARGS__);	\
2651815a883SJoe Perches } while (0)
266ccb1352eSJesse Gross #endif /* datapath.h */
267