1c9422999SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */ 2ccb1352eSJesse Gross /* 3971427f3SAndy Zhou * Copyright (c) 2007-2014 Nicira, Inc. 4ccb1352eSJesse Gross */ 5ccb1352eSJesse Gross 6ccb1352eSJesse Gross #ifndef DATAPATH_H 7ccb1352eSJesse Gross #define DATAPATH_H 1 8ccb1352eSJesse Gross 9ccb1352eSJesse Gross #include <asm/page.h> 10ccb1352eSJesse Gross #include <linux/kernel.h> 11ccb1352eSJesse Gross #include <linux/mutex.h> 12ccb1352eSJesse Gross #include <linux/netdevice.h> 13ccb1352eSJesse Gross #include <linux/skbuff.h> 14ccb1352eSJesse Gross #include <linux/u64_stats_sync.h> 151d8fff90SThomas Graf #include <net/ip_tunnels.h> 16ccb1352eSJesse Gross 177f8a436eSJoe Stringer #include "conntrack.h" 18ccb1352eSJesse Gross #include "flow.h" 19e6445719SPravin B Shelar #include "flow_table.h" 20cd8a6c33SAndy Zhou #include "meter.h" 219602c01eSAndy Zhou #include "vport-internal_dev.h" 22ccb1352eSJesse Gross 2315eac2a7SPravin B Shelar #define DP_MAX_PORTS USHRT_MAX 2415eac2a7SPravin B Shelar #define DP_VPORT_HASH_BUCKETS 1024 25eac87c41SEelco Chaudron #define DP_MASKS_REBALANCE_INTERVAL 4000 2615eac2a7SPravin B Shelar 27ccb1352eSJesse Gross /** 28ccb1352eSJesse Gross * struct dp_stats_percpu - per-cpu packet processing statistics for a given 29ccb1352eSJesse Gross * datapath. 30ccb1352eSJesse Gross * @n_hit: Number of received packets for which a matching flow was found in 31ccb1352eSJesse Gross * the flow table. 32ccb1352eSJesse Gross * @n_miss: Number of received packets that had no matching flow in the flow 33ccb1352eSJesse Gross * table. The sum of @n_hit and @n_miss is the number of packets that have 34ccb1352eSJesse Gross * been received by the datapath. 35ccb1352eSJesse Gross * @n_lost: Number of received packets that had no matching flow in the flow 36ccb1352eSJesse Gross * table that could not be sent to userspace (normally due to an overflow in 37ccb1352eSJesse Gross * one of the datapath's queues). 381bd7116fSAndy Zhou * @n_mask_hit: Number of masks looked up for flow match. 391bd7116fSAndy Zhou * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked 401bd7116fSAndy Zhou * up per packet. 41ccb1352eSJesse Gross */ 42ccb1352eSJesse Gross struct dp_stats_percpu { 43ccb1352eSJesse Gross u64 n_hit; 44ccb1352eSJesse Gross u64 n_missed; 45ccb1352eSJesse Gross u64 n_lost; 461bd7116fSAndy Zhou u64 n_mask_hit; 47df9d9fdfSWANG Cong struct u64_stats_sync syncp; 48ccb1352eSJesse Gross }; 49ccb1352eSJesse Gross 50ccb1352eSJesse Gross /** 51ccb1352eSJesse Gross * struct datapath - datapath for flow-based packet switching 52ccb1352eSJesse Gross * @rcu: RCU callback head for deferred destruction. 53ccb1352eSJesse Gross * @list_node: Element in global 'dps' list. 54b637e498SPravin B Shelar * @table: flow table. 5515eac2a7SPravin B Shelar * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by 568e4e1713SPravin B Shelar * ovs_mutex and RCU. 57ccb1352eSJesse Gross * @stats_percpu: Per-CPU datapath statistics. 5846df7b81SPravin B Shelar * @net: Reference to net namespace. 593a927bc7SPaolo Abeni * @max_headroom: the maximum headroom of all vports in this datapath; it will 603a927bc7SPaolo Abeni * be used by all the internal vports in this dp. 61ccb1352eSJesse Gross * 62ccb1352eSJesse Gross * Context: See the comment on locking at the top of datapath.c for additional 63ccb1352eSJesse Gross * locking information. 64ccb1352eSJesse Gross */ 65ccb1352eSJesse Gross struct datapath { 66ccb1352eSJesse Gross struct rcu_head rcu; 67ccb1352eSJesse Gross struct list_head list_node; 68ccb1352eSJesse Gross 69ccb1352eSJesse Gross /* Flow table. */ 70b637e498SPravin B Shelar struct flow_table table; 71ccb1352eSJesse Gross 72ccb1352eSJesse Gross /* Switch ports. */ 7315eac2a7SPravin B Shelar struct hlist_head *ports; 74ccb1352eSJesse Gross 75ccb1352eSJesse Gross /* Stats. */ 76ccb1352eSJesse Gross struct dp_stats_percpu __percpu *stats_percpu; 7746df7b81SPravin B Shelar 7846df7b81SPravin B Shelar /* Network namespace ref. */ 790c5c9fb5SEric W. Biederman possible_net_t net; 8043d4be9cSThomas Graf 8143d4be9cSThomas Graf u32 user_features; 823a927bc7SPaolo Abeni 833a927bc7SPaolo Abeni u32 max_headroom; 8496fbc13dSAndy Zhou 8596fbc13dSAndy Zhou /* Switch meters. */ 86c7c4c44cSTonghao Zhang struct dp_meter_table meter_tbl; 87eac87c41SEelco Chaudron 88eac87c41SEelco Chaudron /* re-balance flow masks timer */ 89eac87c41SEelco Chaudron struct delayed_work masks_rebalance; 90ccb1352eSJesse Gross }; 91ccb1352eSJesse Gross 92ccb1352eSJesse Gross /** 93ccb1352eSJesse Gross * struct ovs_skb_cb - OVS data in skb CB 9483c8df26SPravin B Shelar * @input_vport: The original vport packet came in on. This value is cached 9583c8df26SPravin B Shelar * when a packet is received by OVS. 967f8a436eSJoe Stringer * @mru: The maximum received fragement size; 0 if the packet is not 977f8a436eSJoe Stringer * fragmented. 98494bea39SLiping Zhang * @acts_origlen: The netlink size of the flow actions applied to this skb. 9952427fa0SDaniel Axtens * @cutlen: The number of bytes from the packet end to be removed. 100ccb1352eSJesse Gross */ 101ccb1352eSJesse Gross struct ovs_skb_cb { 10283c8df26SPravin B Shelar struct vport *input_vport; 1037f8a436eSJoe Stringer u16 mru; 104494bea39SLiping Zhang u16 acts_origlen; 105f2a4d086SWilliam Tu u32 cutlen; 106ccb1352eSJesse Gross }; 107ccb1352eSJesse Gross #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) 108ccb1352eSJesse Gross 109ccb1352eSJesse Gross /** 110ccb1352eSJesse Gross * struct dp_upcall - metadata to include with a packet to send to userspace 111ccb1352eSJesse Gross * @cmd: One of %OVS_PACKET_CMD_*. 1124490108bSBen Pfaff * @userdata: If nonnull, its variable-length value is passed to userspace as 113ccb1352eSJesse Gross * %OVS_PACKET_ATTR_USERDATA. 114e8eedb85SPravin B Shelar * @portid: Netlink portid to which packet should be sent. If @portid is 0 115e8eedb85SPravin B Shelar * then no packet is sent and the packet is accounted in the datapath's @n_lost 116ccb1352eSJesse Gross * counter. 1178f0aad6fSWenyu Zhang * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. 1187f8a436eSJoe Stringer * @mru: If not zero, Maximum received IP fragment size. 119ccb1352eSJesse Gross */ 120ccb1352eSJesse Gross struct dp_upcall_info { 1214c222798SPravin B Shelar struct ip_tunnel_info *egress_tun_info; 122ccb1352eSJesse Gross const struct nlattr *userdata; 123ccea7445SNeil McKee const struct nlattr *actions; 124ccea7445SNeil McKee int actions_len; 12515e47304SEric W. Biederman u32 portid; 126e8eedb85SPravin B Shelar u8 cmd; 1277f8a436eSJoe Stringer u16 mru; 128ccb1352eSJesse Gross }; 129ccb1352eSJesse Gross 1308e4e1713SPravin B Shelar /** 1318e4e1713SPravin B Shelar * struct ovs_net - Per net-namespace data for ovs. 1328e4e1713SPravin B Shelar * @dps: List of datapaths to enable dumping them all out. 1338e4e1713SPravin B Shelar * Protected by genl_mutex. 1348e4e1713SPravin B Shelar */ 1358e4e1713SPravin B Shelar struct ovs_net { 1368e4e1713SPravin B Shelar struct list_head dps; 1378e4e1713SPravin B Shelar struct work_struct dp_notify_work; 13811efd5cbSYi-Hung Wei #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 13911efd5cbSYi-Hung Wei struct ovs_ct_limit_info *ct_limit_info; 14011efd5cbSYi-Hung Wei #endif 141c2ac6673SJoe Stringer 142c2ac6673SJoe Stringer /* Module reference for configuring conntrack. */ 143c2ac6673SJoe Stringer bool xt_label; 1448e4e1713SPravin B Shelar }; 1458e4e1713SPravin B Shelar 146bd1903b7STonghao Zhang /** 147bd1903b7STonghao Zhang * enum ovs_pkt_hash_types - hash info to include with a packet 148bd1903b7STonghao Zhang * to send to userspace. 149bd1903b7STonghao Zhang * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack. 150bd1903b7STonghao Zhang * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash 151bd1903b7STonghao Zhang * over transport ports. 152bd1903b7STonghao Zhang */ 153bd1903b7STonghao Zhang enum ovs_pkt_hash_types { 154bd1903b7STonghao Zhang OVS_PACKET_HASH_SW_BIT = (1ULL << 32), 155bd1903b7STonghao Zhang OVS_PACKET_HASH_L4_BIT = (1ULL << 33), 156bd1903b7STonghao Zhang }; 157bd1903b7STonghao Zhang 158c7d03a00SAlexey Dobriyan extern unsigned int ovs_net_id; 1598e4e1713SPravin B Shelar void ovs_lock(void); 1608e4e1713SPravin B Shelar void ovs_unlock(void); 1618e4e1713SPravin B Shelar 1628e4e1713SPravin B Shelar #ifdef CONFIG_LOCKDEP 1638e4e1713SPravin B Shelar int lockdep_ovsl_is_held(void); 1648e4e1713SPravin B Shelar #else 1658e4e1713SPravin B Shelar #define lockdep_ovsl_is_held() 1 1668e4e1713SPravin B Shelar #endif 1678e4e1713SPravin B Shelar 16880019d31SThomas Graf #define ASSERT_OVSL() WARN_ON(!lockdep_ovsl_is_held()) 1698e4e1713SPravin B Shelar #define ovsl_dereference(p) \ 1708e4e1713SPravin B Shelar rcu_dereference_protected(p, lockdep_ovsl_is_held()) 171663efa36SJesse Gross #define rcu_dereference_ovsl(p) \ 172663efa36SJesse Gross rcu_dereference_check(p, lockdep_ovsl_is_held()) 1738e4e1713SPravin B Shelar 17412eb18f7SThomas Graf static inline struct net *ovs_dp_get_net(const struct datapath *dp) 17546df7b81SPravin B Shelar { 17646df7b81SPravin B Shelar return read_pnet(&dp->net); 17746df7b81SPravin B Shelar } 17846df7b81SPravin B Shelar 17946df7b81SPravin B Shelar static inline void ovs_dp_set_net(struct datapath *dp, struct net *net) 18046df7b81SPravin B Shelar { 18146df7b81SPravin B Shelar write_pnet(&dp->net, net); 18246df7b81SPravin B Shelar } 18346df7b81SPravin B Shelar 1848e4e1713SPravin B Shelar struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); 1858e4e1713SPravin B Shelar 1868e4e1713SPravin B Shelar static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) 1878e4e1713SPravin B Shelar { 1888e4e1713SPravin B Shelar WARN_ON_ONCE(!rcu_read_lock_held()); 1898e4e1713SPravin B Shelar return ovs_lookup_vport(dp, port_no); 1908e4e1713SPravin B Shelar } 1918e4e1713SPravin B Shelar 1928e4e1713SPravin B Shelar static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no) 1938e4e1713SPravin B Shelar { 1948e4e1713SPravin B Shelar WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); 1958e4e1713SPravin B Shelar return ovs_lookup_vport(dp, port_no); 1968e4e1713SPravin B Shelar } 1978e4e1713SPravin B Shelar 1988e4e1713SPravin B Shelar static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no) 1998e4e1713SPravin B Shelar { 2008e4e1713SPravin B Shelar ASSERT_OVSL(); 2018e4e1713SPravin B Shelar return ovs_lookup_vport(dp, port_no); 2028e4e1713SPravin B Shelar } 2038e4e1713SPravin B Shelar 2049602c01eSAndy Zhou /* Must be called with rcu_read_lock. */ 2059602c01eSAndy Zhou static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) 2069602c01eSAndy Zhou { 2079602c01eSAndy Zhou struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); 2089602c01eSAndy Zhou 2099602c01eSAndy Zhou if (dev) { 2109602c01eSAndy Zhou struct vport *vport = ovs_internal_dev_get_vport(dev); 2119602c01eSAndy Zhou 2129602c01eSAndy Zhou if (vport) 2139602c01eSAndy Zhou return vport->dp; 2149602c01eSAndy Zhou } 2159602c01eSAndy Zhou 2169602c01eSAndy Zhou return NULL; 2179602c01eSAndy Zhou } 2189602c01eSAndy Zhou 2199602c01eSAndy Zhou /* The caller must hold either ovs_mutex or rcu_read_lock to keep the 2209602c01eSAndy Zhou * returned dp pointer valid. 2219602c01eSAndy Zhou */ 2229602c01eSAndy Zhou static inline struct datapath *get_dp(struct net *net, int dp_ifindex) 2239602c01eSAndy Zhou { 2249602c01eSAndy Zhou struct datapath *dp; 2259602c01eSAndy Zhou 2269602c01eSAndy Zhou WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); 2279602c01eSAndy Zhou rcu_read_lock(); 2289602c01eSAndy Zhou dp = get_dp_rcu(net, dp_ifindex); 2299602c01eSAndy Zhou rcu_read_unlock(); 2309602c01eSAndy Zhou 2319602c01eSAndy Zhou return dp; 2329602c01eSAndy Zhou } 2339602c01eSAndy Zhou 234ccb1352eSJesse Gross extern struct notifier_block ovs_dp_device_notifier; 23568eb5503SJohannes Berg extern struct genl_family dp_vport_genl_family; 236ccb1352eSJesse Gross 23795a7233cSPaul Blakey DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support); 23895a7233cSPaul Blakey 2398c8b1b83SPravin B Shelar void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); 240ccb1352eSJesse Gross void ovs_dp_detach_port(struct vport *); 241ccb1352eSJesse Gross int ovs_dp_upcall(struct datapath *, struct sk_buff *, 242f2a4d086SWilliam Tu const struct sw_flow_key *, const struct dp_upcall_info *, 243f2a4d086SWilliam Tu uint32_t cutlen); 244ccb1352eSJesse Gross 245971427f3SAndy Zhou const char *ovs_dp_name(const struct datapath *dp); 2469354d452SJiri Benc struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, 2479354d452SJiri Benc u32 portid, u32 seq, u8 cmd); 248ccb1352eSJesse Gross 2492ff3e4e4SPravin B Shelar int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, 25012eb18f7SThomas Graf const struct sw_flow_actions *, struct sw_flow_key *); 251971427f3SAndy Zhou 2528e4e1713SPravin B Shelar void ovs_dp_notify_wq(struct work_struct *work); 25303f0d916SAndy Zhou 254971427f3SAndy Zhou int action_fifos_init(void); 255971427f3SAndy Zhou void action_fifos_exit(void); 256971427f3SAndy Zhou 257be26b9a8SJoe Stringer /* 'KEY' must not have any bits set outside of the 'MASK' */ 258be26b9a8SJoe Stringer #define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) 259be26b9a8SJoe Stringer #define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK)) 260be26b9a8SJoe Stringer 26105da5898SJarno Rajahalme #define OVS_NLERR(logging_allowed, fmt, ...) \ 2621815a883SJoe Perches do { \ 26305da5898SJarno Rajahalme if (logging_allowed && net_ratelimit()) \ 26405da5898SJarno Rajahalme pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \ 2651815a883SJoe Perches } while (0) 266ccb1352eSJesse Gross #endif /* datapath.h */ 267