1c9422999SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */
2ccb1352eSJesse Gross /*
3971427f3SAndy Zhou * Copyright (c) 2007-2014 Nicira, Inc.
4ccb1352eSJesse Gross */
5ccb1352eSJesse Gross
6ccb1352eSJesse Gross #ifndef DATAPATH_H
7ccb1352eSJesse Gross #define DATAPATH_H 1
8ccb1352eSJesse Gross
9ccb1352eSJesse Gross #include <asm/page.h>
10ccb1352eSJesse Gross #include <linux/kernel.h>
11ccb1352eSJesse Gross #include <linux/mutex.h>
12ccb1352eSJesse Gross #include <linux/netdevice.h>
13ccb1352eSJesse Gross #include <linux/skbuff.h>
14ccb1352eSJesse Gross #include <linux/u64_stats_sync.h>
151d8fff90SThomas Graf #include <net/ip_tunnels.h>
16ccb1352eSJesse Gross
177f8a436eSJoe Stringer #include "conntrack.h"
18ccb1352eSJesse Gross #include "flow.h"
19e6445719SPravin B Shelar #include "flow_table.h"
20cd8a6c33SAndy Zhou #include "meter.h"
219602c01eSAndy Zhou #include "vport-internal_dev.h"
22ccb1352eSJesse Gross
2315eac2a7SPravin B Shelar #define DP_MAX_PORTS USHRT_MAX
2415eac2a7SPravin B Shelar #define DP_VPORT_HASH_BUCKETS 1024
25eac87c41SEelco Chaudron #define DP_MASKS_REBALANCE_INTERVAL 4000
2615eac2a7SPravin B Shelar
27ccb1352eSJesse Gross /**
28ccb1352eSJesse Gross * struct dp_stats_percpu - per-cpu packet processing statistics for a given
29ccb1352eSJesse Gross * datapath.
30ccb1352eSJesse Gross * @n_hit: Number of received packets for which a matching flow was found in
31ccb1352eSJesse Gross * the flow table.
32ccb1352eSJesse Gross * @n_miss: Number of received packets that had no matching flow in the flow
33ccb1352eSJesse Gross * table. The sum of @n_hit and @n_miss is the number of packets that have
34ccb1352eSJesse Gross * been received by the datapath.
35ccb1352eSJesse Gross * @n_lost: Number of received packets that had no matching flow in the flow
36ccb1352eSJesse Gross * table that could not be sent to userspace (normally due to an overflow in
37ccb1352eSJesse Gross * one of the datapath's queues).
381bd7116fSAndy Zhou * @n_mask_hit: Number of masks looked up for flow match.
391bd7116fSAndy Zhou * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked
401bd7116fSAndy Zhou * up per packet.
419d2f627bSEelco Chaudron * @n_cache_hit: The number of received packets that had their mask found using
429d2f627bSEelco Chaudron * the mask cache.
43ccb1352eSJesse Gross */
44ccb1352eSJesse Gross struct dp_stats_percpu {
45ccb1352eSJesse Gross u64 n_hit;
46ccb1352eSJesse Gross u64 n_missed;
47ccb1352eSJesse Gross u64 n_lost;
481bd7116fSAndy Zhou u64 n_mask_hit;
499d2f627bSEelco Chaudron u64 n_cache_hit;
50df9d9fdfSWANG Cong struct u64_stats_sync syncp;
51ccb1352eSJesse Gross };
52ccb1352eSJesse Gross
53ccb1352eSJesse Gross /**
54*b83d23a2SMark Gray * struct dp_nlsk_pids - array of netlink portids of for a datapath.
55*b83d23a2SMark Gray * This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU
56*b83d23a2SMark Gray * is enabled and must be protected by rcu.
57*b83d23a2SMark Gray * @rcu: RCU callback head for deferred destruction.
58*b83d23a2SMark Gray * @n_pids: Size of @pids array.
59*b83d23a2SMark Gray * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets
60*b83d23a2SMark Gray * that miss the flow table.
61*b83d23a2SMark Gray */
62*b83d23a2SMark Gray struct dp_nlsk_pids {
63*b83d23a2SMark Gray struct rcu_head rcu;
64*b83d23a2SMark Gray u32 n_pids;
65*b83d23a2SMark Gray u32 pids[];
66*b83d23a2SMark Gray };
67*b83d23a2SMark Gray
68*b83d23a2SMark Gray /**
69ccb1352eSJesse Gross * struct datapath - datapath for flow-based packet switching
70ccb1352eSJesse Gross * @rcu: RCU callback head for deferred destruction.
71ccb1352eSJesse Gross * @list_node: Element in global 'dps' list.
72b637e498SPravin B Shelar * @table: flow table.
7315eac2a7SPravin B Shelar * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
748e4e1713SPravin B Shelar * ovs_mutex and RCU.
75ccb1352eSJesse Gross * @stats_percpu: Per-CPU datapath statistics.
7646df7b81SPravin B Shelar * @net: Reference to net namespace.
773a927bc7SPaolo Abeni * @max_headroom: the maximum headroom of all vports in this datapath; it will
783a927bc7SPaolo Abeni * be used by all the internal vports in this dp.
79*b83d23a2SMark Gray * @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
80ccb1352eSJesse Gross *
81ccb1352eSJesse Gross * Context: See the comment on locking at the top of datapath.c for additional
82ccb1352eSJesse Gross * locking information.
83ccb1352eSJesse Gross */
84ccb1352eSJesse Gross struct datapath {
85ccb1352eSJesse Gross struct rcu_head rcu;
86ccb1352eSJesse Gross struct list_head list_node;
87ccb1352eSJesse Gross
88ccb1352eSJesse Gross /* Flow table. */
89b637e498SPravin B Shelar struct flow_table table;
90ccb1352eSJesse Gross
91ccb1352eSJesse Gross /* Switch ports. */
9215eac2a7SPravin B Shelar struct hlist_head *ports;
93ccb1352eSJesse Gross
94ccb1352eSJesse Gross /* Stats. */
95ccb1352eSJesse Gross struct dp_stats_percpu __percpu *stats_percpu;
9646df7b81SPravin B Shelar
9746df7b81SPravin B Shelar /* Network namespace ref. */
980c5c9fb5SEric W. Biederman possible_net_t net;
9943d4be9cSThomas Graf
10043d4be9cSThomas Graf u32 user_features;
1013a927bc7SPaolo Abeni
1023a927bc7SPaolo Abeni u32 max_headroom;
10396fbc13dSAndy Zhou
10496fbc13dSAndy Zhou /* Switch meters. */
105c7c4c44cSTonghao Zhang struct dp_meter_table meter_tbl;
106*b83d23a2SMark Gray
107*b83d23a2SMark Gray struct dp_nlsk_pids __rcu *upcall_portids;
108ccb1352eSJesse Gross };
109ccb1352eSJesse Gross
110ccb1352eSJesse Gross /**
111ccb1352eSJesse Gross * struct ovs_skb_cb - OVS data in skb CB
11283c8df26SPravin B Shelar * @input_vport: The original vport packet came in on. This value is cached
11383c8df26SPravin B Shelar * when a packet is received by OVS.
1147f8a436eSJoe Stringer * @mru: The maximum received fragement size; 0 if the packet is not
1157f8a436eSJoe Stringer * fragmented.
116494bea39SLiping Zhang * @acts_origlen: The netlink size of the flow actions applied to this skb.
11752427fa0SDaniel Axtens * @cutlen: The number of bytes from the packet end to be removed.
118ccb1352eSJesse Gross */
119ccb1352eSJesse Gross struct ovs_skb_cb {
12083c8df26SPravin B Shelar struct vport *input_vport;
1217f8a436eSJoe Stringer u16 mru;
122494bea39SLiping Zhang u16 acts_origlen;
123f2a4d086SWilliam Tu u32 cutlen;
124ccb1352eSJesse Gross };
125ccb1352eSJesse Gross #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
126ccb1352eSJesse Gross
127ccb1352eSJesse Gross /**
128ccb1352eSJesse Gross * struct dp_upcall - metadata to include with a packet to send to userspace
129ccb1352eSJesse Gross * @cmd: One of %OVS_PACKET_CMD_*.
1304490108bSBen Pfaff * @userdata: If nonnull, its variable-length value is passed to userspace as
131ccb1352eSJesse Gross * %OVS_PACKET_ATTR_USERDATA.
132e8eedb85SPravin B Shelar * @portid: Netlink portid to which packet should be sent. If @portid is 0
133e8eedb85SPravin B Shelar * then no packet is sent and the packet is accounted in the datapath's @n_lost
134ccb1352eSJesse Gross * counter.
1358f0aad6fSWenyu Zhang * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
1367f8a436eSJoe Stringer * @mru: If not zero, Maximum received IP fragment size.
137ccb1352eSJesse Gross */
138ccb1352eSJesse Gross struct dp_upcall_info {
1394c222798SPravin B Shelar struct ip_tunnel_info *egress_tun_info;
140ccb1352eSJesse Gross const struct nlattr *userdata;
141ccea7445SNeil McKee const struct nlattr *actions;
142ccea7445SNeil McKee int actions_len;
14315e47304SEric W. Biederman u32 portid;
144e8eedb85SPravin B Shelar u8 cmd;
1457f8a436eSJoe Stringer u16 mru;
146ccb1352eSJesse Gross };
147ccb1352eSJesse Gross
1488e4e1713SPravin B Shelar /**
1498e4e1713SPravin B Shelar * struct ovs_net - Per net-namespace data for ovs.
1508e4e1713SPravin B Shelar * @dps: List of datapaths to enable dumping them all out.
1518e4e1713SPravin B Shelar * Protected by genl_mutex.
1528e4e1713SPravin B Shelar */
1538e4e1713SPravin B Shelar struct ovs_net {
1548e4e1713SPravin B Shelar struct list_head dps;
1558e4e1713SPravin B Shelar struct work_struct dp_notify_work;
156a65878d6SEelco Chaudron struct delayed_work masks_rebalance;
15711efd5cbSYi-Hung Wei #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
15811efd5cbSYi-Hung Wei struct ovs_ct_limit_info *ct_limit_info;
15911efd5cbSYi-Hung Wei #endif
160c2ac6673SJoe Stringer
161c2ac6673SJoe Stringer /* Module reference for configuring conntrack. */
162c2ac6673SJoe Stringer bool xt_label;
1638e4e1713SPravin B Shelar };
1648e4e1713SPravin B Shelar
165bd1903b7STonghao Zhang /**
166bd1903b7STonghao Zhang * enum ovs_pkt_hash_types - hash info to include with a packet
167bd1903b7STonghao Zhang * to send to userspace.
168bd1903b7STonghao Zhang * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
169bd1903b7STonghao Zhang * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
170bd1903b7STonghao Zhang * over transport ports.
171bd1903b7STonghao Zhang */
172bd1903b7STonghao Zhang enum ovs_pkt_hash_types {
173bd1903b7STonghao Zhang OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
174bd1903b7STonghao Zhang OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
175bd1903b7STonghao Zhang };
176bd1903b7STonghao Zhang
177c7d03a00SAlexey Dobriyan extern unsigned int ovs_net_id;
1788e4e1713SPravin B Shelar void ovs_lock(void);
1798e4e1713SPravin B Shelar void ovs_unlock(void);
1808e4e1713SPravin B Shelar
1818e4e1713SPravin B Shelar #ifdef CONFIG_LOCKDEP
1828e4e1713SPravin B Shelar int lockdep_ovsl_is_held(void);
1838e4e1713SPravin B Shelar #else
1848e4e1713SPravin B Shelar #define lockdep_ovsl_is_held() 1
1858e4e1713SPravin B Shelar #endif
1868e4e1713SPravin B Shelar
18780019d31SThomas Graf #define ASSERT_OVSL() WARN_ON(!lockdep_ovsl_is_held())
1888e4e1713SPravin B Shelar #define ovsl_dereference(p) \
1898e4e1713SPravin B Shelar rcu_dereference_protected(p, lockdep_ovsl_is_held())
190663efa36SJesse Gross #define rcu_dereference_ovsl(p) \
191663efa36SJesse Gross rcu_dereference_check(p, lockdep_ovsl_is_held())
1928e4e1713SPravin B Shelar
ovs_dp_get_net(const struct datapath * dp)19312eb18f7SThomas Graf static inline struct net *ovs_dp_get_net(const struct datapath *dp)
19446df7b81SPravin B Shelar {
19546df7b81SPravin B Shelar return read_pnet(&dp->net);
19646df7b81SPravin B Shelar }
19746df7b81SPravin B Shelar
ovs_dp_set_net(struct datapath * dp,struct net * net)19846df7b81SPravin B Shelar static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
19946df7b81SPravin B Shelar {
20046df7b81SPravin B Shelar write_pnet(&dp->net, net);
20146df7b81SPravin B Shelar }
20246df7b81SPravin B Shelar
2038e4e1713SPravin B Shelar struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
2048e4e1713SPravin B Shelar
ovs_vport_rcu(const struct datapath * dp,int port_no)2058e4e1713SPravin B Shelar static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
2068e4e1713SPravin B Shelar {
2078e4e1713SPravin B Shelar WARN_ON_ONCE(!rcu_read_lock_held());
2088e4e1713SPravin B Shelar return ovs_lookup_vport(dp, port_no);
2098e4e1713SPravin B Shelar }
2108e4e1713SPravin B Shelar
ovs_vport_ovsl_rcu(const struct datapath * dp,int port_no)2118e4e1713SPravin B Shelar static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
2128e4e1713SPravin B Shelar {
2138e4e1713SPravin B Shelar WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
2148e4e1713SPravin B Shelar return ovs_lookup_vport(dp, port_no);
2158e4e1713SPravin B Shelar }
2168e4e1713SPravin B Shelar
ovs_vport_ovsl(const struct datapath * dp,int port_no)2178e4e1713SPravin B Shelar static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
2188e4e1713SPravin B Shelar {
2198e4e1713SPravin B Shelar ASSERT_OVSL();
2208e4e1713SPravin B Shelar return ovs_lookup_vport(dp, port_no);
2218e4e1713SPravin B Shelar }
2228e4e1713SPravin B Shelar
2239602c01eSAndy Zhou /* Must be called with rcu_read_lock. */
get_dp_rcu(struct net * net,int dp_ifindex)2249602c01eSAndy Zhou static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
2259602c01eSAndy Zhou {
2269602c01eSAndy Zhou struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
2279602c01eSAndy Zhou
2289602c01eSAndy Zhou if (dev) {
2299602c01eSAndy Zhou struct vport *vport = ovs_internal_dev_get_vport(dev);
2309602c01eSAndy Zhou
2319602c01eSAndy Zhou if (vport)
2329602c01eSAndy Zhou return vport->dp;
2339602c01eSAndy Zhou }
2349602c01eSAndy Zhou
2359602c01eSAndy Zhou return NULL;
2369602c01eSAndy Zhou }
2379602c01eSAndy Zhou
2389602c01eSAndy Zhou /* The caller must hold either ovs_mutex or rcu_read_lock to keep the
2399602c01eSAndy Zhou * returned dp pointer valid.
2409602c01eSAndy Zhou */
get_dp(struct net * net,int dp_ifindex)2419602c01eSAndy Zhou static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
2429602c01eSAndy Zhou {
2439602c01eSAndy Zhou struct datapath *dp;
2449602c01eSAndy Zhou
2459602c01eSAndy Zhou WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
2469602c01eSAndy Zhou rcu_read_lock();
2479602c01eSAndy Zhou dp = get_dp_rcu(net, dp_ifindex);
2489602c01eSAndy Zhou rcu_read_unlock();
2499602c01eSAndy Zhou
2509602c01eSAndy Zhou return dp;
2519602c01eSAndy Zhou }
2529602c01eSAndy Zhou
253ccb1352eSJesse Gross extern struct notifier_block ovs_dp_device_notifier;
25468eb5503SJohannes Berg extern struct genl_family dp_vport_genl_family;
255ccb1352eSJesse Gross
2568c8b1b83SPravin B Shelar void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
257ccb1352eSJesse Gross void ovs_dp_detach_port(struct vport *);
258ccb1352eSJesse Gross int ovs_dp_upcall(struct datapath *, struct sk_buff *,
259f2a4d086SWilliam Tu const struct sw_flow_key *, const struct dp_upcall_info *,
260f2a4d086SWilliam Tu uint32_t cutlen);
261ccb1352eSJesse Gross
262*b83d23a2SMark Gray u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id);
263*b83d23a2SMark Gray
264971427f3SAndy Zhou const char *ovs_dp_name(const struct datapath *dp);
2659354d452SJiri Benc struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2669354d452SJiri Benc u32 portid, u32 seq, u8 cmd);
267ccb1352eSJesse Gross
2682ff3e4e4SPravin B Shelar int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
26912eb18f7SThomas Graf const struct sw_flow_actions *, struct sw_flow_key *);
270971427f3SAndy Zhou
2718e4e1713SPravin B Shelar void ovs_dp_notify_wq(struct work_struct *work);
27203f0d916SAndy Zhou
273971427f3SAndy Zhou int action_fifos_init(void);
274971427f3SAndy Zhou void action_fifos_exit(void);
275971427f3SAndy Zhou
276be26b9a8SJoe Stringer /* 'KEY' must not have any bits set outside of the 'MASK' */
277be26b9a8SJoe Stringer #define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
278be26b9a8SJoe Stringer #define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
279be26b9a8SJoe Stringer
28005da5898SJarno Rajahalme #define OVS_NLERR(logging_allowed, fmt, ...) \
2811815a883SJoe Perches do { \
28205da5898SJarno Rajahalme if (logging_allowed && net_ratelimit()) \
28305da5898SJarno Rajahalme pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \
2841815a883SJoe Perches } while (0)
285ccb1352eSJesse Gross #endif /* datapath.h */
286