15b497af4SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 27f8a436eSJoe Stringer /* 37f8a436eSJoe Stringer * Copyright (c) 2015 Nicira, Inc. 47f8a436eSJoe Stringer */ 57f8a436eSJoe Stringer 67f8a436eSJoe Stringer #include <linux/module.h> 77f8a436eSJoe Stringer #include <linux/openvswitch.h> 805752523SJarno Rajahalme #include <linux/tcp.h> 905752523SJarno Rajahalme #include <linux/udp.h> 1005752523SJarno Rajahalme #include <linux/sctp.h> 1111efd5cbSYi-Hung Wei #include <linux/static_key.h> 127f8a436eSJoe Stringer #include <net/ip.h> 1311efd5cbSYi-Hung Wei #include <net/genetlink.h> 147f8a436eSJoe Stringer #include <net/netfilter/nf_conntrack_core.h> 1511efd5cbSYi-Hung Wei #include <net/netfilter/nf_conntrack_count.h> 16cae3a262SJoe Stringer #include <net/netfilter/nf_conntrack_helper.h> 17c2ac6673SJoe Stringer #include <net/netfilter/nf_conntrack_labels.h> 1805752523SJarno Rajahalme #include <net/netfilter/nf_conntrack_seqadj.h> 1906bd2bdfSYi-Hung Wei #include <net/netfilter/nf_conntrack_timeout.h> 207f8a436eSJoe Stringer #include <net/netfilter/nf_conntrack_zones.h> 217f8a436eSJoe Stringer #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 2270b095c8SFlorian Westphal #include <net/ipv6_frag.h> 237f8a436eSJoe Stringer 244806e975SFlorian Westphal #if IS_ENABLED(CONFIG_NF_NAT) 25d2c5c103SFlorian Westphal #include <net/netfilter/nf_nat.h> 2605752523SJarno Rajahalme #endif 2705752523SJarno Rajahalme 28b702436aSPaul Blakey #include <net/netfilter/nf_conntrack_act_ct.h> 29b702436aSPaul Blakey 307f8a436eSJoe Stringer #include "datapath.h" 317f8a436eSJoe Stringer #include "conntrack.h" 327f8a436eSJoe Stringer #include "flow.h" 337f8a436eSJoe Stringer #include "flow_netlink.h" 347f8a436eSJoe Stringer 357f8a436eSJoe Stringer struct ovs_ct_len_tbl { 3605752523SJarno Rajahalme int maxlen; 3705752523SJarno Rajahalme int minlen; 387f8a436eSJoe Stringer }; 397f8a436eSJoe Stringer 40182e3042SJoe Stringer /* Metadata mark for masked write to conntrack mark */ 41182e3042SJoe Stringer struct md_mark { 42182e3042SJoe Stringer u32 value; 43182e3042SJoe Stringer u32 mask; 44182e3042SJoe Stringer }; 45182e3042SJoe Stringer 46c2ac6673SJoe Stringer /* Metadata label for masked write to conntrack label. */ 4733db4125SJoe Stringer struct md_labels { 4833db4125SJoe Stringer struct ovs_key_ct_labels value; 4933db4125SJoe Stringer struct ovs_key_ct_labels mask; 50c2ac6673SJoe Stringer }; 51c2ac6673SJoe Stringer 5205752523SJarno Rajahalme enum ovs_ct_nat { 5305752523SJarno Rajahalme OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */ 5405752523SJarno Rajahalme OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */ 5505752523SJarno Rajahalme OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */ 5605752523SJarno Rajahalme }; 5705752523SJarno Rajahalme 587f8a436eSJoe Stringer /* Conntrack action context for execution. */ 597f8a436eSJoe Stringer struct ovs_conntrack_info { 60cae3a262SJoe Stringer struct nf_conntrack_helper *helper; 617f8a436eSJoe Stringer struct nf_conntrack_zone zone; 627f8a436eSJoe Stringer struct nf_conn *ct; 63ab38a7b5SJoe Stringer u8 commit : 1; 6405752523SJarno Rajahalme u8 nat : 3; /* enum ovs_ct_nat */ 65dd41d33fSJarno Rajahalme u8 force : 1; 6612064551SJarno Rajahalme u8 have_eventmask : 1; 677f8a436eSJoe Stringer u16 family; 6812064551SJarno Rajahalme u32 eventmask; /* Mask of 1 << IPCT_*. */ 69182e3042SJoe Stringer struct md_mark mark; 7033db4125SJoe Stringer struct md_labels labels; 7106bd2bdfSYi-Hung Wei char timeout[CTNL_TIMEOUT_NAME_MAX]; 7271778951SYi-Hung Wei struct nf_ct_timeout *nf_ct_timeout; 734806e975SFlorian Westphal #if IS_ENABLED(CONFIG_NF_NAT) 742eb0f624SThierry Du Tre struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */ 7505752523SJarno Rajahalme #endif 767f8a436eSJoe Stringer }; 777f8a436eSJoe Stringer 7811efd5cbSYi-Hung Wei #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 7911efd5cbSYi-Hung Wei #define OVS_CT_LIMIT_UNLIMITED 0 8011efd5cbSYi-Hung Wei #define OVS_CT_LIMIT_DEFAULT OVS_CT_LIMIT_UNLIMITED 8111efd5cbSYi-Hung Wei #define CT_LIMIT_HASH_BUCKETS 512 8211efd5cbSYi-Hung Wei static DEFINE_STATIC_KEY_FALSE(ovs_ct_limit_enabled); 8311efd5cbSYi-Hung Wei 8411efd5cbSYi-Hung Wei struct ovs_ct_limit { 8511efd5cbSYi-Hung Wei /* Elements in ovs_ct_limit_info->limits hash table */ 8611efd5cbSYi-Hung Wei struct hlist_node hlist_node; 8711efd5cbSYi-Hung Wei struct rcu_head rcu; 8811efd5cbSYi-Hung Wei u16 zone; 8911efd5cbSYi-Hung Wei u32 limit; 9011efd5cbSYi-Hung Wei }; 9111efd5cbSYi-Hung Wei 9211efd5cbSYi-Hung Wei struct ovs_ct_limit_info { 9311efd5cbSYi-Hung Wei u32 default_limit; 9411efd5cbSYi-Hung Wei struct hlist_head *limits; 9511efd5cbSYi-Hung Wei struct nf_conncount_data *data; 9611efd5cbSYi-Hung Wei }; 9711efd5cbSYi-Hung Wei 9811efd5cbSYi-Hung Wei static const struct nla_policy ct_limit_policy[OVS_CT_LIMIT_ATTR_MAX + 1] = { 9911efd5cbSYi-Hung Wei [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NLA_NESTED, }, 10011efd5cbSYi-Hung Wei }; 10111efd5cbSYi-Hung Wei #endif 10211efd5cbSYi-Hung Wei 10309aa98adSJarno Rajahalme static bool labels_nonzero(const struct ovs_key_ct_labels *labels); 10409aa98adSJarno Rajahalme 1052f3ab9f9SJoe Stringer static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); 1062f3ab9f9SJoe Stringer 1077f8a436eSJoe Stringer static u16 key_to_nfproto(const struct sw_flow_key *key) 1087f8a436eSJoe Stringer { 1097f8a436eSJoe Stringer switch (ntohs(key->eth.type)) { 1107f8a436eSJoe Stringer case ETH_P_IP: 1117f8a436eSJoe Stringer return NFPROTO_IPV4; 1127f8a436eSJoe Stringer case ETH_P_IPV6: 1137f8a436eSJoe Stringer return NFPROTO_IPV6; 1147f8a436eSJoe Stringer default: 1157f8a436eSJoe Stringer return NFPROTO_UNSPEC; 1167f8a436eSJoe Stringer } 1177f8a436eSJoe Stringer } 1187f8a436eSJoe Stringer 1197f8a436eSJoe Stringer /* Map SKB connection state into the values used by flow definition. */ 1207f8a436eSJoe Stringer static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) 1217f8a436eSJoe Stringer { 1227f8a436eSJoe Stringer u8 ct_state = OVS_CS_F_TRACKED; 1237f8a436eSJoe Stringer 1247f8a436eSJoe Stringer switch (ctinfo) { 1257f8a436eSJoe Stringer case IP_CT_ESTABLISHED_REPLY: 1267f8a436eSJoe Stringer case IP_CT_RELATED_REPLY: 1277f8a436eSJoe Stringer ct_state |= OVS_CS_F_REPLY_DIR; 1287f8a436eSJoe Stringer break; 1297f8a436eSJoe Stringer default: 1307f8a436eSJoe Stringer break; 1317f8a436eSJoe Stringer } 1327f8a436eSJoe Stringer 1337f8a436eSJoe Stringer switch (ctinfo) { 1347f8a436eSJoe Stringer case IP_CT_ESTABLISHED: 1357f8a436eSJoe Stringer case IP_CT_ESTABLISHED_REPLY: 1367f8a436eSJoe Stringer ct_state |= OVS_CS_F_ESTABLISHED; 1377f8a436eSJoe Stringer break; 1387f8a436eSJoe Stringer case IP_CT_RELATED: 1397f8a436eSJoe Stringer case IP_CT_RELATED_REPLY: 1407f8a436eSJoe Stringer ct_state |= OVS_CS_F_RELATED; 1417f8a436eSJoe Stringer break; 1427f8a436eSJoe Stringer case IP_CT_NEW: 1437f8a436eSJoe Stringer ct_state |= OVS_CS_F_NEW; 1447f8a436eSJoe Stringer break; 1457f8a436eSJoe Stringer default: 1467f8a436eSJoe Stringer break; 1477f8a436eSJoe Stringer } 1487f8a436eSJoe Stringer 1497f8a436eSJoe Stringer return ct_state; 1507f8a436eSJoe Stringer } 1517f8a436eSJoe Stringer 1520d5cdef8SJoe Stringer static u32 ovs_ct_get_mark(const struct nf_conn *ct) 1530d5cdef8SJoe Stringer { 1540d5cdef8SJoe Stringer #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 1550d5cdef8SJoe Stringer return ct ? ct->mark : 0; 1560d5cdef8SJoe Stringer #else 1570d5cdef8SJoe Stringer return 0; 1580d5cdef8SJoe Stringer #endif 1590d5cdef8SJoe Stringer } 1600d5cdef8SJoe Stringer 161b87cec38SJarno Rajahalme /* Guard against conntrack labels max size shrinking below 128 bits. */ 162b87cec38SJarno Rajahalme #if NF_CT_LABELS_MAX_SIZE < 16 163b87cec38SJarno Rajahalme #error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes 164b87cec38SJarno Rajahalme #endif 165b87cec38SJarno Rajahalme 16633db4125SJoe Stringer static void ovs_ct_get_labels(const struct nf_conn *ct, 16733db4125SJoe Stringer struct ovs_key_ct_labels *labels) 168c2ac6673SJoe Stringer { 169c2ac6673SJoe Stringer struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; 170c2ac6673SJoe Stringer 171b87cec38SJarno Rajahalme if (cl) 172b87cec38SJarno Rajahalme memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); 173b87cec38SJarno Rajahalme else 17433db4125SJoe Stringer memset(labels, 0, OVS_CT_LABELS_LEN); 175c2ac6673SJoe Stringer } 176c2ac6673SJoe Stringer 1779dd7f890SJarno Rajahalme static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key, 1789dd7f890SJarno Rajahalme const struct nf_conntrack_tuple *orig, 1799dd7f890SJarno Rajahalme u8 icmp_proto) 1809dd7f890SJarno Rajahalme { 181316d4d78SJarno Rajahalme key->ct_orig_proto = orig->dst.protonum; 1829dd7f890SJarno Rajahalme if (orig->dst.protonum == icmp_proto) { 1839dd7f890SJarno Rajahalme key->ct.orig_tp.src = htons(orig->dst.u.icmp.type); 1849dd7f890SJarno Rajahalme key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code); 1859dd7f890SJarno Rajahalme } else { 1869dd7f890SJarno Rajahalme key->ct.orig_tp.src = orig->src.u.all; 1879dd7f890SJarno Rajahalme key->ct.orig_tp.dst = orig->dst.u.all; 1889dd7f890SJarno Rajahalme } 1899dd7f890SJarno Rajahalme } 1909dd7f890SJarno Rajahalme 1917f8a436eSJoe Stringer static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, 192182e3042SJoe Stringer const struct nf_conntrack_zone *zone, 193182e3042SJoe Stringer const struct nf_conn *ct) 1947f8a436eSJoe Stringer { 195316d4d78SJarno Rajahalme key->ct_state = state; 196316d4d78SJarno Rajahalme key->ct_zone = zone->id; 1970d5cdef8SJoe Stringer key->ct.mark = ovs_ct_get_mark(ct); 19833db4125SJoe Stringer ovs_ct_get_labels(ct, &key->ct.labels); 1999dd7f890SJarno Rajahalme 2009dd7f890SJarno Rajahalme if (ct) { 2019dd7f890SJarno Rajahalme const struct nf_conntrack_tuple *orig; 2029dd7f890SJarno Rajahalme 2039dd7f890SJarno Rajahalme /* Use the master if we have one. */ 2049dd7f890SJarno Rajahalme if (ct->master) 2059dd7f890SJarno Rajahalme ct = ct->master; 2069dd7f890SJarno Rajahalme orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 2079dd7f890SJarno Rajahalme 2089dd7f890SJarno Rajahalme /* IP version must match with the master connection. */ 2099dd7f890SJarno Rajahalme if (key->eth.type == htons(ETH_P_IP) && 2109dd7f890SJarno Rajahalme nf_ct_l3num(ct) == NFPROTO_IPV4) { 2119dd7f890SJarno Rajahalme key->ipv4.ct_orig.src = orig->src.u3.ip; 2129dd7f890SJarno Rajahalme key->ipv4.ct_orig.dst = orig->dst.u3.ip; 2139dd7f890SJarno Rajahalme __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP); 2149dd7f890SJarno Rajahalme return; 2159dd7f890SJarno Rajahalme } else if (key->eth.type == htons(ETH_P_IPV6) && 2169dd7f890SJarno Rajahalme !sw_flow_key_is_nd(key) && 2179dd7f890SJarno Rajahalme nf_ct_l3num(ct) == NFPROTO_IPV6) { 2189dd7f890SJarno Rajahalme key->ipv6.ct_orig.src = orig->src.u3.in6; 2199dd7f890SJarno Rajahalme key->ipv6.ct_orig.dst = orig->dst.u3.in6; 2209dd7f890SJarno Rajahalme __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP); 2219dd7f890SJarno Rajahalme return; 2229dd7f890SJarno Rajahalme } 2239dd7f890SJarno Rajahalme } 224316d4d78SJarno Rajahalme /* Clear 'ct_orig_proto' to mark the non-existence of conntrack 2259dd7f890SJarno Rajahalme * original direction key fields. 2269dd7f890SJarno Rajahalme */ 227316d4d78SJarno Rajahalme key->ct_orig_proto = 0; 2287f8a436eSJoe Stringer } 2297f8a436eSJoe Stringer 2305e17da63SJarno Rajahalme /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has 23105752523SJarno Rajahalme * previously sent the packet to conntrack via the ct action. If 23205752523SJarno Rajahalme * 'keep_nat_flags' is true, the existing NAT flags retained, else they are 23305752523SJarno Rajahalme * initialized from the connection status. 2347f8a436eSJoe Stringer */ 2357f8a436eSJoe Stringer static void ovs_ct_update_key(const struct sk_buff *skb, 236d110986cSJoe Stringer const struct ovs_conntrack_info *info, 23705752523SJarno Rajahalme struct sw_flow_key *key, bool post_ct, 23805752523SJarno Rajahalme bool keep_nat_flags) 2397f8a436eSJoe Stringer { 2407f8a436eSJoe Stringer const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; 2417f8a436eSJoe Stringer enum ip_conntrack_info ctinfo; 2427f8a436eSJoe Stringer struct nf_conn *ct; 2437f8a436eSJoe Stringer u8 state = 0; 2447f8a436eSJoe Stringer 2457f8a436eSJoe Stringer ct = nf_ct_get(skb, &ctinfo); 2467f8a436eSJoe Stringer if (ct) { 2477f8a436eSJoe Stringer state = ovs_ct_get_state(ctinfo); 2489f13ded8SJarno Rajahalme /* All unconfirmed entries are NEW connections. */ 2494f0909eeSJoe Stringer if (!nf_ct_is_confirmed(ct)) 2504f0909eeSJoe Stringer state |= OVS_CS_F_NEW; 2519f13ded8SJarno Rajahalme /* OVS persists the related flag for the duration of the 2529f13ded8SJarno Rajahalme * connection. 2539f13ded8SJarno Rajahalme */ 2547f8a436eSJoe Stringer if (ct->master) 2557f8a436eSJoe Stringer state |= OVS_CS_F_RELATED; 25605752523SJarno Rajahalme if (keep_nat_flags) { 257316d4d78SJarno Rajahalme state |= key->ct_state & OVS_CS_F_NAT_MASK; 25805752523SJarno Rajahalme } else { 25905752523SJarno Rajahalme if (ct->status & IPS_SRC_NAT) 26005752523SJarno Rajahalme state |= OVS_CS_F_SRC_NAT; 26105752523SJarno Rajahalme if (ct->status & IPS_DST_NAT) 26205752523SJarno Rajahalme state |= OVS_CS_F_DST_NAT; 26305752523SJarno Rajahalme } 2647f8a436eSJoe Stringer zone = nf_ct_zone(ct); 2657f8a436eSJoe Stringer } else if (post_ct) { 2667f8a436eSJoe Stringer state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; 267d110986cSJoe Stringer if (info) 268d110986cSJoe Stringer zone = &info->zone; 2697f8a436eSJoe Stringer } 270182e3042SJoe Stringer __ovs_ct_update_key(key, state, zone, ct); 2717f8a436eSJoe Stringer } 2727f8a436eSJoe Stringer 2739f13ded8SJarno Rajahalme /* This is called to initialize CT key fields possibly coming in from the local 2749f13ded8SJarno Rajahalme * stack. 2759f13ded8SJarno Rajahalme */ 276d29334c1Swenxu void ovs_ct_fill_key(const struct sk_buff *skb, 277d29334c1Swenxu struct sw_flow_key *key, 278d29334c1Swenxu bool post_ct) 2797f8a436eSJoe Stringer { 280d29334c1Swenxu ovs_ct_update_key(skb, NULL, key, post_ct, false); 2817f8a436eSJoe Stringer } 2827f8a436eSJoe Stringer 2839dd7f890SJarno Rajahalme int ovs_ct_put_key(const struct sw_flow_key *swkey, 2849dd7f890SJarno Rajahalme const struct sw_flow_key *output, struct sk_buff *skb) 2857f8a436eSJoe Stringer { 286316d4d78SJarno Rajahalme if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state)) 2877f8a436eSJoe Stringer return -EMSGSIZE; 2887f8a436eSJoe Stringer 2897f8a436eSJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 290316d4d78SJarno Rajahalme nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone)) 2917f8a436eSJoe Stringer return -EMSGSIZE; 2927f8a436eSJoe Stringer 293182e3042SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 2949dd7f890SJarno Rajahalme nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark)) 295182e3042SJoe Stringer return -EMSGSIZE; 296182e3042SJoe Stringer 2979723e6abSValentin Rothberg if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 2989dd7f890SJarno Rajahalme nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels), 2999dd7f890SJarno Rajahalme &output->ct.labels)) 300c2ac6673SJoe Stringer return -EMSGSIZE; 301c2ac6673SJoe Stringer 302316d4d78SJarno Rajahalme if (swkey->ct_orig_proto) { 3039dd7f890SJarno Rajahalme if (swkey->eth.type == htons(ETH_P_IP)) { 3049aba6c5bSPeilin Ye struct ovs_key_ct_tuple_ipv4 orig; 3059aba6c5bSPeilin Ye 3069aba6c5bSPeilin Ye memset(&orig, 0, sizeof(orig)); 3079aba6c5bSPeilin Ye orig.ipv4_src = output->ipv4.ct_orig.src; 3089aba6c5bSPeilin Ye orig.ipv4_dst = output->ipv4.ct_orig.dst; 3099aba6c5bSPeilin Ye orig.src_port = output->ct.orig_tp.src; 3109aba6c5bSPeilin Ye orig.dst_port = output->ct.orig_tp.dst; 3119aba6c5bSPeilin Ye orig.ipv4_proto = output->ct_orig_proto; 3129aba6c5bSPeilin Ye 3139dd7f890SJarno Rajahalme if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, 3149dd7f890SJarno Rajahalme sizeof(orig), &orig)) 3159dd7f890SJarno Rajahalme return -EMSGSIZE; 3169dd7f890SJarno Rajahalme } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 3179aba6c5bSPeilin Ye struct ovs_key_ct_tuple_ipv6 orig; 3189aba6c5bSPeilin Ye 3199aba6c5bSPeilin Ye memset(&orig, 0, sizeof(orig)); 3209aba6c5bSPeilin Ye memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32, 3219aba6c5bSPeilin Ye sizeof(orig.ipv6_src)); 3229aba6c5bSPeilin Ye memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32, 3239aba6c5bSPeilin Ye sizeof(orig.ipv6_dst)); 3249aba6c5bSPeilin Ye orig.src_port = output->ct.orig_tp.src; 3259aba6c5bSPeilin Ye orig.dst_port = output->ct.orig_tp.dst; 3269aba6c5bSPeilin Ye orig.ipv6_proto = output->ct_orig_proto; 3279aba6c5bSPeilin Ye 3289dd7f890SJarno Rajahalme if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, 3299dd7f890SJarno Rajahalme sizeof(orig), &orig)) 3309dd7f890SJarno Rajahalme return -EMSGSIZE; 3319dd7f890SJarno Rajahalme } 3329dd7f890SJarno Rajahalme } 3339dd7f890SJarno Rajahalme 334182e3042SJoe Stringer return 0; 335182e3042SJoe Stringer } 336182e3042SJoe Stringer 3376ffcea79SJarno Rajahalme static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, 338182e3042SJoe Stringer u32 ct_mark, u32 mask) 339182e3042SJoe Stringer { 3400d5cdef8SJoe Stringer #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 341182e3042SJoe Stringer u32 new_mark; 342182e3042SJoe Stringer 343182e3042SJoe Stringer new_mark = ct_mark | (ct->mark & ~(mask)); 344182e3042SJoe Stringer if (ct->mark != new_mark) { 345182e3042SJoe Stringer ct->mark = new_mark; 346193e3096SJarno Rajahalme if (nf_ct_is_confirmed(ct)) 347182e3042SJoe Stringer nf_conntrack_event_cache(IPCT_MARK, ct); 348182e3042SJoe Stringer key->ct.mark = new_mark; 349182e3042SJoe Stringer } 350182e3042SJoe Stringer 3517f8a436eSJoe Stringer return 0; 3520d5cdef8SJoe Stringer #else 3530d5cdef8SJoe Stringer return -ENOTSUPP; 3540d5cdef8SJoe Stringer #endif 3557f8a436eSJoe Stringer } 3567f8a436eSJoe Stringer 3576ffcea79SJarno Rajahalme static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct) 358c2ac6673SJoe Stringer { 359c2ac6673SJoe Stringer struct nf_conn_labels *cl; 360c2ac6673SJoe Stringer 361c2ac6673SJoe Stringer cl = nf_ct_labels_find(ct); 362c2ac6673SJoe Stringer if (!cl) { 363c2ac6673SJoe Stringer nf_ct_labels_ext_add(ct); 364c2ac6673SJoe Stringer cl = nf_ct_labels_find(ct); 365c2ac6673SJoe Stringer } 3666ffcea79SJarno Rajahalme 3676ffcea79SJarno Rajahalme return cl; 3686ffcea79SJarno Rajahalme } 3696ffcea79SJarno Rajahalme 3706ffcea79SJarno Rajahalme /* Initialize labels for a new, yet to be committed conntrack entry. Note that 3716ffcea79SJarno Rajahalme * since the new connection is not yet confirmed, and thus no-one else has 3722317c6b5SJarno Rajahalme * access to it's labels, we simply write them over. 3736ffcea79SJarno Rajahalme */ 3746ffcea79SJarno Rajahalme static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, 3756ffcea79SJarno Rajahalme const struct ovs_key_ct_labels *labels, 3766ffcea79SJarno Rajahalme const struct ovs_key_ct_labels *mask) 3776ffcea79SJarno Rajahalme { 37809aa98adSJarno Rajahalme struct nf_conn_labels *cl, *master_cl; 37909aa98adSJarno Rajahalme bool have_mask = labels_nonzero(mask); 38009aa98adSJarno Rajahalme 38109aa98adSJarno Rajahalme /* Inherit master's labels to the related connection? */ 38209aa98adSJarno Rajahalme master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL; 38309aa98adSJarno Rajahalme 38409aa98adSJarno Rajahalme if (!master_cl && !have_mask) 38509aa98adSJarno Rajahalme return 0; /* Nothing to do. */ 3866ffcea79SJarno Rajahalme 3876ffcea79SJarno Rajahalme cl = ovs_ct_get_conn_labels(ct); 388b87cec38SJarno Rajahalme if (!cl) 389c2ac6673SJoe Stringer return -ENOSPC; 390c2ac6673SJoe Stringer 39109aa98adSJarno Rajahalme /* Inherit the master's labels, if any. */ 39209aa98adSJarno Rajahalme if (master_cl) 39309aa98adSJarno Rajahalme *cl = *master_cl; 39409aa98adSJarno Rajahalme 39509aa98adSJarno Rajahalme if (have_mask) { 39609aa98adSJarno Rajahalme u32 *dst = (u32 *)cl->bits; 39709aa98adSJarno Rajahalme int i; 39809aa98adSJarno Rajahalme 3996ffcea79SJarno Rajahalme for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) 4006ffcea79SJarno Rajahalme dst[i] = (dst[i] & ~mask->ct_labels_32[i]) | 40109aa98adSJarno Rajahalme (labels->ct_labels_32[i] 40209aa98adSJarno Rajahalme & mask->ct_labels_32[i]); 40309aa98adSJarno Rajahalme } 4046ffcea79SJarno Rajahalme 4052317c6b5SJarno Rajahalme /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the 406abd0a4f2SJarno Rajahalme * IPCT_LABEL bit is set in the event cache. 4072317c6b5SJarno Rajahalme */ 4082317c6b5SJarno Rajahalme nf_conntrack_event_cache(IPCT_LABEL, ct); 4092317c6b5SJarno Rajahalme 4106ffcea79SJarno Rajahalme memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); 4116ffcea79SJarno Rajahalme 4126ffcea79SJarno Rajahalme return 0; 4136ffcea79SJarno Rajahalme } 4146ffcea79SJarno Rajahalme 4156ffcea79SJarno Rajahalme static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key, 4166ffcea79SJarno Rajahalme const struct ovs_key_ct_labels *labels, 4176ffcea79SJarno Rajahalme const struct ovs_key_ct_labels *mask) 4186ffcea79SJarno Rajahalme { 4196ffcea79SJarno Rajahalme struct nf_conn_labels *cl; 4206ffcea79SJarno Rajahalme int err; 4216ffcea79SJarno Rajahalme 4226ffcea79SJarno Rajahalme cl = ovs_ct_get_conn_labels(ct); 4236ffcea79SJarno Rajahalme if (!cl) 4246ffcea79SJarno Rajahalme return -ENOSPC; 4256ffcea79SJarno Rajahalme 4266ffcea79SJarno Rajahalme err = nf_connlabels_replace(ct, labels->ct_labels_32, 427cb80d58fSJarno Rajahalme mask->ct_labels_32, 428cb80d58fSJarno Rajahalme OVS_CT_LABELS_LEN_32); 429c2ac6673SJoe Stringer if (err) 430c2ac6673SJoe Stringer return err; 431193e3096SJarno Rajahalme 4326ffcea79SJarno Rajahalme memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); 433c2ac6673SJoe Stringer 434c2ac6673SJoe Stringer return 0; 435c2ac6673SJoe Stringer } 436c2ac6673SJoe Stringer 437cae3a262SJoe Stringer /* 'skb' should already be pulled to nh_ofs. */ 438cae3a262SJoe Stringer static int ovs_ct_helper(struct sk_buff *skb, u16 proto) 439cae3a262SJoe Stringer { 440cae3a262SJoe Stringer const struct nf_conntrack_helper *helper; 441cae3a262SJoe Stringer const struct nf_conn_help *help; 442cae3a262SJoe Stringer enum ip_conntrack_info ctinfo; 443cae3a262SJoe Stringer unsigned int protoff; 444cae3a262SJoe Stringer struct nf_conn *ct; 44505752523SJarno Rajahalme int err; 446cae3a262SJoe Stringer 447cae3a262SJoe Stringer ct = nf_ct_get(skb, &ctinfo); 448cae3a262SJoe Stringer if (!ct || ctinfo == IP_CT_RELATED_REPLY) 449cae3a262SJoe Stringer return NF_ACCEPT; 450cae3a262SJoe Stringer 451cae3a262SJoe Stringer help = nfct_help(ct); 452cae3a262SJoe Stringer if (!help) 453cae3a262SJoe Stringer return NF_ACCEPT; 454cae3a262SJoe Stringer 455cae3a262SJoe Stringer helper = rcu_dereference(help->helper); 456cae3a262SJoe Stringer if (!helper) 457cae3a262SJoe Stringer return NF_ACCEPT; 458cae3a262SJoe Stringer 459cae3a262SJoe Stringer switch (proto) { 460cae3a262SJoe Stringer case NFPROTO_IPV4: 461cae3a262SJoe Stringer protoff = ip_hdrlen(skb); 462cae3a262SJoe Stringer break; 463cae3a262SJoe Stringer case NFPROTO_IPV6: { 464cae3a262SJoe Stringer u8 nexthdr = ipv6_hdr(skb)->nexthdr; 465cae3a262SJoe Stringer __be16 frag_off; 466cc570605SJoe Stringer int ofs; 467cae3a262SJoe Stringer 468cc570605SJoe Stringer ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 469cc570605SJoe Stringer &frag_off); 470cc570605SJoe Stringer if (ofs < 0 || (frag_off & htons(~0x7)) != 0) { 471cae3a262SJoe Stringer pr_debug("proto header not found\n"); 472cae3a262SJoe Stringer return NF_ACCEPT; 473cae3a262SJoe Stringer } 474cc570605SJoe Stringer protoff = ofs; 475cae3a262SJoe Stringer break; 476cae3a262SJoe Stringer } 477cae3a262SJoe Stringer default: 478cae3a262SJoe Stringer WARN_ONCE(1, "helper invoked on non-IP family!"); 479cae3a262SJoe Stringer return NF_DROP; 480cae3a262SJoe Stringer } 481cae3a262SJoe Stringer 48205752523SJarno Rajahalme err = helper->help(skb, protoff, ct, ctinfo); 48305752523SJarno Rajahalme if (err != NF_ACCEPT) 48405752523SJarno Rajahalme return err; 48505752523SJarno Rajahalme 48605752523SJarno Rajahalme /* Adjust seqs after helper. This is needed due to some helpers (e.g., 48705752523SJarno Rajahalme * FTP with NAT) adusting the TCP payload size when mangling IP 48805752523SJarno Rajahalme * addresses and/or port numbers in the text-based control connection. 48905752523SJarno Rajahalme */ 49005752523SJarno Rajahalme if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 49105752523SJarno Rajahalme !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) 49205752523SJarno Rajahalme return NF_DROP; 49305752523SJarno Rajahalme return NF_ACCEPT; 494cae3a262SJoe Stringer } 495cae3a262SJoe Stringer 49674c16618SJoe Stringer /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 49774c16618SJoe Stringer * value if 'skb' is freed. 49874c16618SJoe Stringer */ 4997f8a436eSJoe Stringer static int handle_fragments(struct net *net, struct sw_flow_key *key, 5007f8a436eSJoe Stringer u16 zone, struct sk_buff *skb) 5017f8a436eSJoe Stringer { 5027f8a436eSJoe Stringer struct ovs_skb_cb ovs_cb = *OVS_CB(skb); 503daaa7d64SFlorian Westphal int err; 5047f8a436eSJoe Stringer 5057f8a436eSJoe Stringer if (key->eth.type == htons(ETH_P_IP)) { 5067f8a436eSJoe Stringer enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; 5077f8a436eSJoe Stringer 5087f8a436eSJoe Stringer memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 50919bcf9f2SEric W. Biederman err = ip_defrag(net, skb, user); 5107f8a436eSJoe Stringer if (err) 5117f8a436eSJoe Stringer return err; 5127f8a436eSJoe Stringer 5137f8a436eSJoe Stringer ovs_cb.mru = IPCB(skb)->frag_max_size; 5147f8a436eSJoe Stringer #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 51574c16618SJoe Stringer } else if (key->eth.type == htons(ETH_P_IPV6)) { 5167f8a436eSJoe Stringer enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 5177f8a436eSJoe Stringer 5187f8a436eSJoe Stringer memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 519daaa7d64SFlorian Westphal err = nf_ct_frag6_gather(net, skb, user); 520f92a80a9SDaniele Di Proietto if (err) { 521f92a80a9SDaniele Di Proietto if (err != -EINPROGRESS) 522f92a80a9SDaniele Di Proietto kfree_skb(skb); 523daaa7d64SFlorian Westphal return err; 524f92a80a9SDaniele Di Proietto } 5257f8a436eSJoe Stringer 526daaa7d64SFlorian Westphal key->ip.proto = ipv6_hdr(skb)->nexthdr; 5277f8a436eSJoe Stringer ovs_cb.mru = IP6CB(skb)->frag_max_size; 5287f8a436eSJoe Stringer #endif 5297f8a436eSJoe Stringer } else { 53074c16618SJoe Stringer kfree_skb(skb); 5317f8a436eSJoe Stringer return -EPFNOSUPPORT; 5327f8a436eSJoe Stringer } 5337f8a436eSJoe Stringer 534ad06a566SGreg Rose /* The key extracted from the fragment that completed this datagram 535ad06a566SGreg Rose * likely didn't have an L4 header, so regenerate it. 536ad06a566SGreg Rose */ 537ad06a566SGreg Rose ovs_flow_key_update_l3l4(skb, key); 538ad06a566SGreg Rose 5397f8a436eSJoe Stringer key->ip.frag = OVS_FRAG_TYPE_NONE; 5407f8a436eSJoe Stringer skb_clear_hash(skb); 5417f8a436eSJoe Stringer skb->ignore_df = 1; 5427f8a436eSJoe Stringer *OVS_CB(skb) = ovs_cb; 5437f8a436eSJoe Stringer 5447f8a436eSJoe Stringer return 0; 5457f8a436eSJoe Stringer } 5467f8a436eSJoe Stringer 5477f8a436eSJoe Stringer static struct nf_conntrack_expect * 5487f8a436eSJoe Stringer ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, 5497f8a436eSJoe Stringer u16 proto, const struct sk_buff *skb) 5507f8a436eSJoe Stringer { 5517f8a436eSJoe Stringer struct nf_conntrack_tuple tuple; 552cf5d7091SJarno Rajahalme struct nf_conntrack_expect *exp; 5537f8a436eSJoe Stringer 554a31f1adcSEric W. Biederman if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) 5557f8a436eSJoe Stringer return NULL; 556cf5d7091SJarno Rajahalme 557cf5d7091SJarno Rajahalme exp = __nf_ct_expect_find(net, zone, &tuple); 558cf5d7091SJarno Rajahalme if (exp) { 559cf5d7091SJarno Rajahalme struct nf_conntrack_tuple_hash *h; 560cf5d7091SJarno Rajahalme 561cf5d7091SJarno Rajahalme /* Delete existing conntrack entry, if it clashes with the 562cf5d7091SJarno Rajahalme * expectation. This can happen since conntrack ALGs do not 563cf5d7091SJarno Rajahalme * check for clashes between (new) expectations and existing 564cf5d7091SJarno Rajahalme * conntrack entries. nf_conntrack_in() will check the 565cf5d7091SJarno Rajahalme * expectations only if a conntrack entry can not be found, 566cf5d7091SJarno Rajahalme * which can lead to OVS finding the expectation (here) in the 567cf5d7091SJarno Rajahalme * init direction, but which will not be removed by the 568cf5d7091SJarno Rajahalme * nf_conntrack_in() call, if a matching conntrack entry is 569cf5d7091SJarno Rajahalme * found instead. In this case all init direction packets 570cf5d7091SJarno Rajahalme * would be reported as new related packets, while reply 571cf5d7091SJarno Rajahalme * direction packets would be reported as un-related 572cf5d7091SJarno Rajahalme * established packets. 573cf5d7091SJarno Rajahalme */ 574cf5d7091SJarno Rajahalme h = nf_conntrack_find_get(net, zone, &tuple); 575cf5d7091SJarno Rajahalme if (h) { 576cf5d7091SJarno Rajahalme struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 577cf5d7091SJarno Rajahalme 578cf5d7091SJarno Rajahalme nf_ct_delete(ct, 0, 0); 579408bdcfcSFlorian Westphal nf_ct_put(ct); 580cf5d7091SJarno Rajahalme } 581cf5d7091SJarno Rajahalme } 582cf5d7091SJarno Rajahalme 583cf5d7091SJarno Rajahalme return exp; 5847f8a436eSJoe Stringer } 5857f8a436eSJoe Stringer 586289f2253SJarno Rajahalme /* This replicates logic from nf_conntrack_core.c that is not exported. */ 587289f2253SJarno Rajahalme static enum ip_conntrack_info 588289f2253SJarno Rajahalme ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) 589289f2253SJarno Rajahalme { 590289f2253SJarno Rajahalme const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 591289f2253SJarno Rajahalme 592289f2253SJarno Rajahalme if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) 593289f2253SJarno Rajahalme return IP_CT_ESTABLISHED_REPLY; 594289f2253SJarno Rajahalme /* Once we've had two way comms, always ESTABLISHED. */ 595289f2253SJarno Rajahalme if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 596289f2253SJarno Rajahalme return IP_CT_ESTABLISHED; 597289f2253SJarno Rajahalme if (test_bit(IPS_EXPECTED_BIT, &ct->status)) 598289f2253SJarno Rajahalme return IP_CT_RELATED; 599289f2253SJarno Rajahalme return IP_CT_NEW; 600289f2253SJarno Rajahalme } 601289f2253SJarno Rajahalme 602289f2253SJarno Rajahalme /* Find an existing connection which this packet belongs to without 603289f2253SJarno Rajahalme * re-attributing statistics or modifying the connection state. This allows an 6045e17da63SJarno Rajahalme * skb->_nfct lost due to an upcall to be recovered during actions execution. 605289f2253SJarno Rajahalme * 606289f2253SJarno Rajahalme * Must be called with rcu_read_lock. 607289f2253SJarno Rajahalme * 6085e17da63SJarno Rajahalme * On success, populates skb->_nfct and returns the connection. Returns NULL 6095e17da63SJarno Rajahalme * if there is no existing entry. 610289f2253SJarno Rajahalme */ 611289f2253SJarno Rajahalme static struct nf_conn * 612289f2253SJarno Rajahalme ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, 6139ff464dbSJarno Rajahalme u8 l3num, struct sk_buff *skb, bool natted) 614289f2253SJarno Rajahalme { 615289f2253SJarno Rajahalme struct nf_conntrack_tuple tuple; 616289f2253SJarno Rajahalme struct nf_conntrack_tuple_hash *h; 617289f2253SJarno Rajahalme struct nf_conn *ct; 618289f2253SJarno Rajahalme 61960e3be94SFlorian Westphal if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, 62060e3be94SFlorian Westphal net, &tuple)) { 621289f2253SJarno Rajahalme pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 622289f2253SJarno Rajahalme return NULL; 623289f2253SJarno Rajahalme } 624289f2253SJarno Rajahalme 6259ff464dbSJarno Rajahalme /* Must invert the tuple if skb has been transformed by NAT. */ 6269ff464dbSJarno Rajahalme if (natted) { 6279ff464dbSJarno Rajahalme struct nf_conntrack_tuple inverse; 6289ff464dbSJarno Rajahalme 629303e0c55SFlorian Westphal if (!nf_ct_invert_tuple(&inverse, &tuple)) { 6309ff464dbSJarno Rajahalme pr_debug("ovs_ct_find_existing: Inversion failed!\n"); 6319ff464dbSJarno Rajahalme return NULL; 6329ff464dbSJarno Rajahalme } 6339ff464dbSJarno Rajahalme tuple = inverse; 6349ff464dbSJarno Rajahalme } 6359ff464dbSJarno Rajahalme 636289f2253SJarno Rajahalme /* look for tuple match */ 637289f2253SJarno Rajahalme h = nf_conntrack_find_get(net, zone, &tuple); 638289f2253SJarno Rajahalme if (!h) 639289f2253SJarno Rajahalme return NULL; /* Not found. */ 640289f2253SJarno Rajahalme 641289f2253SJarno Rajahalme ct = nf_ct_tuplehash_to_ctrack(h); 642289f2253SJarno Rajahalme 6439ff464dbSJarno Rajahalme /* Inverted packet tuple matches the reverse direction conntrack tuple, 6449ff464dbSJarno Rajahalme * select the other tuplehash to get the right 'ctinfo' bits for this 6459ff464dbSJarno Rajahalme * packet. 6469ff464dbSJarno Rajahalme */ 6479ff464dbSJarno Rajahalme if (natted) 6489ff464dbSJarno Rajahalme h = &ct->tuplehash[!h->tuple.dst.dir]; 6499ff464dbSJarno Rajahalme 650c74454faSFlorian Westphal nf_ct_set(skb, ct, ovs_ct_get_info(h)); 651289f2253SJarno Rajahalme return ct; 652289f2253SJarno Rajahalme } 653289f2253SJarno Rajahalme 6548b97ac5bSGreg Rose static 6558b97ac5bSGreg Rose struct nf_conn *ovs_ct_executed(struct net *net, 6568b97ac5bSGreg Rose const struct sw_flow_key *key, 6578b97ac5bSGreg Rose const struct ovs_conntrack_info *info, 6588b97ac5bSGreg Rose struct sk_buff *skb, 6598b97ac5bSGreg Rose bool *ct_executed) 6608b97ac5bSGreg Rose { 6618b97ac5bSGreg Rose struct nf_conn *ct = NULL; 6628b97ac5bSGreg Rose 6638b97ac5bSGreg Rose /* If no ct, check if we have evidence that an existing conntrack entry 6648b97ac5bSGreg Rose * might be found for this skb. This happens when we lose a skb->_nfct 6658b97ac5bSGreg Rose * due to an upcall, or if the direction is being forced. If the 6668b97ac5bSGreg Rose * connection was not confirmed, it is not cached and needs to be run 6678b97ac5bSGreg Rose * through conntrack again. 6688b97ac5bSGreg Rose */ 6698b97ac5bSGreg Rose *ct_executed = (key->ct_state & OVS_CS_F_TRACKED) && 6708b97ac5bSGreg Rose !(key->ct_state & OVS_CS_F_INVALID) && 6718b97ac5bSGreg Rose (key->ct_zone == info->zone.id); 6728b97ac5bSGreg Rose 6738b97ac5bSGreg Rose if (*ct_executed || (!key->ct_state && info->force)) { 6748b97ac5bSGreg Rose ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, 6758b97ac5bSGreg Rose !!(key->ct_state & 6768b97ac5bSGreg Rose OVS_CS_F_NAT_MASK)); 6778b97ac5bSGreg Rose } 6788b97ac5bSGreg Rose 6798b97ac5bSGreg Rose return ct; 6808b97ac5bSGreg Rose } 6818b97ac5bSGreg Rose 6825e17da63SJarno Rajahalme /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ 683289f2253SJarno Rajahalme static bool skb_nfct_cached(struct net *net, 684289f2253SJarno Rajahalme const struct sw_flow_key *key, 685289f2253SJarno Rajahalme const struct ovs_conntrack_info *info, 686289f2253SJarno Rajahalme struct sk_buff *skb) 6877f8a436eSJoe Stringer { 6887f8a436eSJoe Stringer enum ip_conntrack_info ctinfo; 6897f8a436eSJoe Stringer struct nf_conn *ct; 6908b97ac5bSGreg Rose bool ct_executed = true; 6917f8a436eSJoe Stringer 6927f8a436eSJoe Stringer ct = nf_ct_get(skb, &ctinfo); 6938b97ac5bSGreg Rose if (!ct) 6948b97ac5bSGreg Rose ct = ovs_ct_executed(net, key, info, skb, &ct_executed); 6958b97ac5bSGreg Rose 696dd41d33fSJarno Rajahalme if (ct) 697dd41d33fSJarno Rajahalme nf_ct_get(skb, &ctinfo); 6988b97ac5bSGreg Rose else 6997f8a436eSJoe Stringer return false; 7008b97ac5bSGreg Rose 7017f8a436eSJoe Stringer if (!net_eq(net, read_pnet(&ct->ct_net))) 7027f8a436eSJoe Stringer return false; 7037f8a436eSJoe Stringer if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct))) 7047f8a436eSJoe Stringer return false; 705cae3a262SJoe Stringer if (info->helper) { 706cae3a262SJoe Stringer struct nf_conn_help *help; 707cae3a262SJoe Stringer 708cae3a262SJoe Stringer help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); 709cae3a262SJoe Stringer if (help && rcu_access_pointer(help->helper) != info->helper) 710cae3a262SJoe Stringer return false; 711cae3a262SJoe Stringer } 71271778951SYi-Hung Wei if (info->nf_ct_timeout) { 71371778951SYi-Hung Wei struct nf_conn_timeout *timeout_ext; 71471778951SYi-Hung Wei 71571778951SYi-Hung Wei timeout_ext = nf_ct_timeout_find(ct); 71671778951SYi-Hung Wei if (!timeout_ext || info->nf_ct_timeout != 71771778951SYi-Hung Wei rcu_dereference(timeout_ext->timeout)) 71871778951SYi-Hung Wei return false; 71971778951SYi-Hung Wei } 720dd41d33fSJarno Rajahalme /* Force conntrack entry direction to the current packet? */ 721dd41d33fSJarno Rajahalme if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 722dd41d33fSJarno Rajahalme /* Delete the conntrack entry if confirmed, else just release 723dd41d33fSJarno Rajahalme * the reference. 724dd41d33fSJarno Rajahalme */ 725dd41d33fSJarno Rajahalme if (nf_ct_is_confirmed(ct)) 726dd41d33fSJarno Rajahalme nf_ct_delete(ct, 0, 0); 727b768b16dSJarno Rajahalme 728408bdcfcSFlorian Westphal nf_ct_put(ct); 729dd41d33fSJarno Rajahalme nf_ct_set(skb, NULL, 0); 730dd41d33fSJarno Rajahalme return false; 731dd41d33fSJarno Rajahalme } 7327f8a436eSJoe Stringer 7338b97ac5bSGreg Rose return ct_executed; 7347f8a436eSJoe Stringer } 7357f8a436eSJoe Stringer 7364806e975SFlorian Westphal #if IS_ENABLED(CONFIG_NF_NAT) 73760b44ca6SAaron Conole static void ovs_nat_update_key(struct sw_flow_key *key, 73860b44ca6SAaron Conole const struct sk_buff *skb, 73960b44ca6SAaron Conole enum nf_nat_manip_type maniptype) 74060b44ca6SAaron Conole { 74160b44ca6SAaron Conole if (maniptype == NF_NAT_MANIP_SRC) { 74260b44ca6SAaron Conole __be16 src; 74360b44ca6SAaron Conole 74460b44ca6SAaron Conole key->ct_state |= OVS_CS_F_SRC_NAT; 74560b44ca6SAaron Conole if (key->eth.type == htons(ETH_P_IP)) 74660b44ca6SAaron Conole key->ipv4.addr.src = ip_hdr(skb)->saddr; 74760b44ca6SAaron Conole else if (key->eth.type == htons(ETH_P_IPV6)) 74860b44ca6SAaron Conole memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, 74960b44ca6SAaron Conole sizeof(key->ipv6.addr.src)); 75060b44ca6SAaron Conole else 75160b44ca6SAaron Conole return; 75260b44ca6SAaron Conole 75360b44ca6SAaron Conole if (key->ip.proto == IPPROTO_UDP) 75460b44ca6SAaron Conole src = udp_hdr(skb)->source; 75560b44ca6SAaron Conole else if (key->ip.proto == IPPROTO_TCP) 75660b44ca6SAaron Conole src = tcp_hdr(skb)->source; 75760b44ca6SAaron Conole else if (key->ip.proto == IPPROTO_SCTP) 75860b44ca6SAaron Conole src = sctp_hdr(skb)->source; 75960b44ca6SAaron Conole else 76060b44ca6SAaron Conole return; 76160b44ca6SAaron Conole 76260b44ca6SAaron Conole key->tp.src = src; 76360b44ca6SAaron Conole } else { 76460b44ca6SAaron Conole __be16 dst; 76560b44ca6SAaron Conole 76660b44ca6SAaron Conole key->ct_state |= OVS_CS_F_DST_NAT; 76760b44ca6SAaron Conole if (key->eth.type == htons(ETH_P_IP)) 76860b44ca6SAaron Conole key->ipv4.addr.dst = ip_hdr(skb)->daddr; 76960b44ca6SAaron Conole else if (key->eth.type == htons(ETH_P_IPV6)) 77060b44ca6SAaron Conole memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, 77160b44ca6SAaron Conole sizeof(key->ipv6.addr.dst)); 77260b44ca6SAaron Conole else 77360b44ca6SAaron Conole return; 77460b44ca6SAaron Conole 77560b44ca6SAaron Conole if (key->ip.proto == IPPROTO_UDP) 77660b44ca6SAaron Conole dst = udp_hdr(skb)->dest; 77760b44ca6SAaron Conole else if (key->ip.proto == IPPROTO_TCP) 77860b44ca6SAaron Conole dst = tcp_hdr(skb)->dest; 77960b44ca6SAaron Conole else if (key->ip.proto == IPPROTO_SCTP) 78060b44ca6SAaron Conole dst = sctp_hdr(skb)->dest; 78160b44ca6SAaron Conole else 78260b44ca6SAaron Conole return; 78360b44ca6SAaron Conole 78460b44ca6SAaron Conole key->tp.dst = dst; 78560b44ca6SAaron Conole } 78660b44ca6SAaron Conole } 78760b44ca6SAaron Conole 78805752523SJarno Rajahalme /* Modelled after nf_nat_ipv[46]_fn(). 78905752523SJarno Rajahalme * range is only used for new, uninitialized NAT state. 79005752523SJarno Rajahalme * Returns either NF_ACCEPT or NF_DROP. 79105752523SJarno Rajahalme */ 79205752523SJarno Rajahalme static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 79305752523SJarno Rajahalme enum ip_conntrack_info ctinfo, 7942eb0f624SThierry Du Tre const struct nf_nat_range2 *range, 79560b44ca6SAaron Conole enum nf_nat_manip_type maniptype, struct sw_flow_key *key) 79605752523SJarno Rajahalme { 79705752523SJarno Rajahalme int hooknum, nh_off, err = NF_ACCEPT; 79805752523SJarno Rajahalme 79905752523SJarno Rajahalme nh_off = skb_network_offset(skb); 80075f01a4cSLance Richardson skb_pull_rcsum(skb, nh_off); 80105752523SJarno Rajahalme 80205752523SJarno Rajahalme /* See HOOK2MANIP(). */ 80305752523SJarno Rajahalme if (maniptype == NF_NAT_MANIP_SRC) 80405752523SJarno Rajahalme hooknum = NF_INET_LOCAL_IN; /* Source NAT */ 80505752523SJarno Rajahalme else 80605752523SJarno Rajahalme hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ 80705752523SJarno Rajahalme 80805752523SJarno Rajahalme switch (ctinfo) { 80905752523SJarno Rajahalme case IP_CT_RELATED: 81005752523SJarno Rajahalme case IP_CT_RELATED_REPLY: 8113bf195aeSFlorian Westphal if (IS_ENABLED(CONFIG_NF_NAT) && 81299b7248eSArnd Bergmann skb->protocol == htons(ETH_P_IP) && 81305752523SJarno Rajahalme ip_hdr(skb)->protocol == IPPROTO_ICMP) { 81405752523SJarno Rajahalme if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 81505752523SJarno Rajahalme hooknum)) 81605752523SJarno Rajahalme err = NF_DROP; 81705752523SJarno Rajahalme goto push; 8183bf195aeSFlorian Westphal } else if (IS_ENABLED(CONFIG_IPV6) && 81999b7248eSArnd Bergmann skb->protocol == htons(ETH_P_IPV6)) { 82005752523SJarno Rajahalme __be16 frag_off; 82105752523SJarno Rajahalme u8 nexthdr = ipv6_hdr(skb)->nexthdr; 82205752523SJarno Rajahalme int hdrlen = ipv6_skip_exthdr(skb, 82305752523SJarno Rajahalme sizeof(struct ipv6hdr), 82405752523SJarno Rajahalme &nexthdr, &frag_off); 82505752523SJarno Rajahalme 82605752523SJarno Rajahalme if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 82705752523SJarno Rajahalme if (!nf_nat_icmpv6_reply_translation(skb, ct, 82805752523SJarno Rajahalme ctinfo, 82905752523SJarno Rajahalme hooknum, 83005752523SJarno Rajahalme hdrlen)) 83105752523SJarno Rajahalme err = NF_DROP; 83205752523SJarno Rajahalme goto push; 83305752523SJarno Rajahalme } 83405752523SJarno Rajahalme } 83505752523SJarno Rajahalme /* Non-ICMP, fall thru to initialize if needed. */ 836df561f66SGustavo A. R. Silva fallthrough; 83705752523SJarno Rajahalme case IP_CT_NEW: 83805752523SJarno Rajahalme /* Seen it before? This can happen for loopback, retrans, 83905752523SJarno Rajahalme * or local packets. 84005752523SJarno Rajahalme */ 84105752523SJarno Rajahalme if (!nf_nat_initialized(ct, maniptype)) { 84205752523SJarno Rajahalme /* Initialize according to the NAT action. */ 84305752523SJarno Rajahalme err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) 84405752523SJarno Rajahalme /* Action is set up to establish a new 84505752523SJarno Rajahalme * mapping. 84605752523SJarno Rajahalme */ 84705752523SJarno Rajahalme ? nf_nat_setup_info(ct, range, maniptype) 84805752523SJarno Rajahalme : nf_nat_alloc_null_binding(ct, hooknum); 84905752523SJarno Rajahalme if (err != NF_ACCEPT) 85005752523SJarno Rajahalme goto push; 85105752523SJarno Rajahalme } 85205752523SJarno Rajahalme break; 85305752523SJarno Rajahalme 85405752523SJarno Rajahalme case IP_CT_ESTABLISHED: 85505752523SJarno Rajahalme case IP_CT_ESTABLISHED_REPLY: 85605752523SJarno Rajahalme break; 85705752523SJarno Rajahalme 85805752523SJarno Rajahalme default: 85905752523SJarno Rajahalme err = NF_DROP; 86005752523SJarno Rajahalme goto push; 86105752523SJarno Rajahalme } 86205752523SJarno Rajahalme 86305752523SJarno Rajahalme err = nf_nat_packet(ct, ctinfo, hooknum, skb); 86405752523SJarno Rajahalme push: 8657d42e84eSChristophe JAILLET skb_push_rcsum(skb, nh_off); 86605752523SJarno Rajahalme 86760b44ca6SAaron Conole /* Update the flow key if NAT successful. */ 86860b44ca6SAaron Conole if (err == NF_ACCEPT) 86960b44ca6SAaron Conole ovs_nat_update_key(key, skb, maniptype); 87060b44ca6SAaron Conole 87105752523SJarno Rajahalme return err; 87205752523SJarno Rajahalme } 87305752523SJarno Rajahalme 87405752523SJarno Rajahalme /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ 87505752523SJarno Rajahalme static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 87605752523SJarno Rajahalme const struct ovs_conntrack_info *info, 87705752523SJarno Rajahalme struct sk_buff *skb, struct nf_conn *ct, 87805752523SJarno Rajahalme enum ip_conntrack_info ctinfo) 87905752523SJarno Rajahalme { 88005752523SJarno Rajahalme enum nf_nat_manip_type maniptype; 88105752523SJarno Rajahalme int err; 88205752523SJarno Rajahalme 88305752523SJarno Rajahalme /* Add NAT extension if not confirmed yet. */ 88405752523SJarno Rajahalme if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) 88505752523SJarno Rajahalme return NF_ACCEPT; /* Can't NAT. */ 88605752523SJarno Rajahalme 88705752523SJarno Rajahalme /* Determine NAT type. 88805752523SJarno Rajahalme * Check if the NAT type can be deduced from the tracked connection. 8895745b0beSJarno Rajahalme * Make sure new expected connections (IP_CT_RELATED) are NATted only 8905745b0beSJarno Rajahalme * when committing. 89105752523SJarno Rajahalme */ 89205752523SJarno Rajahalme if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && 89305752523SJarno Rajahalme ct->status & IPS_NAT_MASK && 8945745b0beSJarno Rajahalme (ctinfo != IP_CT_RELATED || info->commit)) { 89505752523SJarno Rajahalme /* NAT an established or related connection like before. */ 89605752523SJarno Rajahalme if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 89705752523SJarno Rajahalme /* This is the REPLY direction for a connection 89805752523SJarno Rajahalme * for which NAT was applied in the forward 89905752523SJarno Rajahalme * direction. Do the reverse NAT. 90005752523SJarno Rajahalme */ 90105752523SJarno Rajahalme maniptype = ct->status & IPS_SRC_NAT 90205752523SJarno Rajahalme ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; 90305752523SJarno Rajahalme else 90405752523SJarno Rajahalme maniptype = ct->status & IPS_SRC_NAT 90505752523SJarno Rajahalme ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; 90605752523SJarno Rajahalme } else if (info->nat & OVS_CT_SRC_NAT) { 90705752523SJarno Rajahalme maniptype = NF_NAT_MANIP_SRC; 90805752523SJarno Rajahalme } else if (info->nat & OVS_CT_DST_NAT) { 90905752523SJarno Rajahalme maniptype = NF_NAT_MANIP_DST; 91005752523SJarno Rajahalme } else { 91105752523SJarno Rajahalme return NF_ACCEPT; /* Connection is not NATed. */ 91205752523SJarno Rajahalme } 91360b44ca6SAaron Conole err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key); 91405752523SJarno Rajahalme 9158aa7b526SDumitru Ceara if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { 9168aa7b526SDumitru Ceara if (ct->status & IPS_SRC_NAT) { 9175d50aa83SAaron Conole if (maniptype == NF_NAT_MANIP_SRC) 9185d50aa83SAaron Conole maniptype = NF_NAT_MANIP_DST; 9195d50aa83SAaron Conole else 9205d50aa83SAaron Conole maniptype = NF_NAT_MANIP_SRC; 9215d50aa83SAaron Conole 9225d50aa83SAaron Conole err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, 92360b44ca6SAaron Conole maniptype, key); 9248aa7b526SDumitru Ceara } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 9258aa7b526SDumitru Ceara err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, 92660b44ca6SAaron Conole NF_NAT_MANIP_SRC, key); 9278aa7b526SDumitru Ceara } 9285d50aa83SAaron Conole } 9295d50aa83SAaron Conole 93005752523SJarno Rajahalme return err; 93105752523SJarno Rajahalme } 9324806e975SFlorian Westphal #else /* !CONFIG_NF_NAT */ 93305752523SJarno Rajahalme static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 93405752523SJarno Rajahalme const struct ovs_conntrack_info *info, 93505752523SJarno Rajahalme struct sk_buff *skb, struct nf_conn *ct, 93605752523SJarno Rajahalme enum ip_conntrack_info ctinfo) 93705752523SJarno Rajahalme { 93805752523SJarno Rajahalme return NF_ACCEPT; 93905752523SJarno Rajahalme } 94005752523SJarno Rajahalme #endif 94105752523SJarno Rajahalme 9429f13ded8SJarno Rajahalme /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if 943394e910eSJarno Rajahalme * not done already. Update key with new CT state after passing the packet 944394e910eSJarno Rajahalme * through conntrack. 9455e17da63SJarno Rajahalme * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be 9469f13ded8SJarno Rajahalme * set to NULL and 0 will be returned. 9479f13ded8SJarno Rajahalme */ 9484f0909eeSJoe Stringer static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 9497f8a436eSJoe Stringer const struct ovs_conntrack_info *info, 9507f8a436eSJoe Stringer struct sk_buff *skb) 9517f8a436eSJoe Stringer { 9527f8a436eSJoe Stringer /* If we are recirculating packets to match on conntrack fields and 9537f8a436eSJoe Stringer * committing with a separate conntrack action, then we don't need to 9547f8a436eSJoe Stringer * actually run the packet through conntrack twice unless it's for a 9557f8a436eSJoe Stringer * different zone. 9567f8a436eSJoe Stringer */ 95728b6e0c1SJarno Rajahalme bool cached = skb_nfct_cached(net, key, info, skb); 95828b6e0c1SJarno Rajahalme enum ip_conntrack_info ctinfo; 95928b6e0c1SJarno Rajahalme struct nf_conn *ct; 96028b6e0c1SJarno Rajahalme 96128b6e0c1SJarno Rajahalme if (!cached) { 96293e66024SFlorian Westphal struct nf_hook_state state = { 96393e66024SFlorian Westphal .hook = NF_INET_PRE_ROUTING, 96493e66024SFlorian Westphal .pf = info->family, 96593e66024SFlorian Westphal .net = net, 96693e66024SFlorian Westphal }; 9677f8a436eSJoe Stringer struct nf_conn *tmpl = info->ct; 9685b6b9293SJarno Rajahalme int err; 9697f8a436eSJoe Stringer 9707f8a436eSJoe Stringer /* Associate skb with specified zone. */ 9717f8a436eSJoe Stringer if (tmpl) { 972408bdcfcSFlorian Westphal ct = nf_ct_get(skb, &ctinfo); 973408bdcfcSFlorian Westphal nf_ct_put(ct); 9747f8a436eSJoe Stringer nf_conntrack_get(&tmpl->ct_general); 975c74454faSFlorian Westphal nf_ct_set(skb, tmpl, IP_CT_NEW); 9767f8a436eSJoe Stringer } 9777f8a436eSJoe Stringer 97893e66024SFlorian Westphal err = nf_conntrack_in(skb, &state); 9795b6b9293SJarno Rajahalme if (err != NF_ACCEPT) 9807f8a436eSJoe Stringer return -ENOENT; 981cae3a262SJoe Stringer 98205752523SJarno Rajahalme /* Clear CT state NAT flags to mark that we have not yet done 98305752523SJarno Rajahalme * NAT after the nf_conntrack_in() call. We can actually clear 98405752523SJarno Rajahalme * the whole state, as it will be re-initialized below. 98505752523SJarno Rajahalme */ 986316d4d78SJarno Rajahalme key->ct_state = 0; 98705752523SJarno Rajahalme 98805752523SJarno Rajahalme /* Update the key, but keep the NAT flags. */ 98905752523SJarno Rajahalme ovs_ct_update_key(skb, info, key, true, true); 99005752523SJarno Rajahalme } 99105752523SJarno Rajahalme 99205752523SJarno Rajahalme ct = nf_ct_get(skb, &ctinfo); 99305752523SJarno Rajahalme if (ct) { 994248d45f1SYi-Hung Wei bool add_helper = false; 995248d45f1SYi-Hung Wei 99605752523SJarno Rajahalme /* Packets starting a new connection must be NATted before the 99705752523SJarno Rajahalme * helper, so that the helper knows about the NAT. We enforce 99805752523SJarno Rajahalme * this by delaying both NAT and helper calls for unconfirmed 99905752523SJarno Rajahalme * connections until the committing CT action. For later 100005752523SJarno Rajahalme * packets NAT and Helper may be called in either order. 100105752523SJarno Rajahalme * 100205752523SJarno Rajahalme * NAT will be done only if the CT action has NAT, and only 100305752523SJarno Rajahalme * once per packet (per zone), as guarded by the NAT bits in 1004316d4d78SJarno Rajahalme * the key->ct_state. 100505752523SJarno Rajahalme */ 1006316d4d78SJarno Rajahalme if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && 100705752523SJarno Rajahalme (nf_ct_is_confirmed(ct) || info->commit) && 100805752523SJarno Rajahalme ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { 100905752523SJarno Rajahalme return -EINVAL; 101028b6e0c1SJarno Rajahalme } 1011394e910eSJarno Rajahalme 101216ec3d4fSJoe Stringer /* Userspace may decide to perform a ct lookup without a helper 1013248d45f1SYi-Hung Wei * specified followed by a (recirculate and) commit with one, 1014248d45f1SYi-Hung Wei * or attach a helper in a later commit. Therefore, for 1015248d45f1SYi-Hung Wei * connections which we will commit, we may need to attach 1016248d45f1SYi-Hung Wei * the helper here. 101716ec3d4fSJoe Stringer */ 1018248d45f1SYi-Hung Wei if (info->commit && info->helper && !nfct_help(ct)) { 101916ec3d4fSJoe Stringer int err = __nf_ct_try_assign_helper(ct, info->ct, 102016ec3d4fSJoe Stringer GFP_ATOMIC); 102116ec3d4fSJoe Stringer if (err) 102216ec3d4fSJoe Stringer return err; 1023248d45f1SYi-Hung Wei add_helper = true; 1024fa7e428cSFlavio Leitner 1025fa7e428cSFlavio Leitner /* helper installed, add seqadj if NAT is required */ 1026fa7e428cSFlavio Leitner if (info->nat && !nfct_seqadj(ct)) { 1027fa7e428cSFlavio Leitner if (!nfct_seqadj_ext_add(ct)) 1028fa7e428cSFlavio Leitner return -EINVAL; 1029fa7e428cSFlavio Leitner } 103016ec3d4fSJoe Stringer } 103116ec3d4fSJoe Stringer 103228b6e0c1SJarno Rajahalme /* Call the helper only if: 1033248d45f1SYi-Hung Wei * - nf_conntrack_in() was executed above ("!cached") or a 1034248d45f1SYi-Hung Wei * helper was just attached ("add_helper") for a confirmed 1035248d45f1SYi-Hung Wei * connection, or 103628b6e0c1SJarno Rajahalme * - When committing an unconfirmed connection. 103728b6e0c1SJarno Rajahalme */ 1038248d45f1SYi-Hung Wei if ((nf_ct_is_confirmed(ct) ? !cached || add_helper : 1039248d45f1SYi-Hung Wei info->commit) && 104028b6e0c1SJarno Rajahalme ovs_ct_helper(skb, info->family) != NF_ACCEPT) { 1041cae3a262SJoe Stringer return -EINVAL; 1042cae3a262SJoe Stringer } 1043e2ef5203SNuman Siddique 1044e2ef5203SNuman Siddique if (nf_ct_protonum(ct) == IPPROTO_TCP && 1045e2ef5203SNuman Siddique nf_ct_is_confirmed(ct) && nf_conntrack_tcp_established(ct)) { 1046e2ef5203SNuman Siddique /* Be liberal for tcp packets so that out-of-window 1047e2ef5203SNuman Siddique * packets are not marked invalid. 1048e2ef5203SNuman Siddique */ 1049e2ef5203SNuman Siddique nf_ct_set_tcp_be_liberal(ct); 1050e2ef5203SNuman Siddique } 1051b702436aSPaul Blakey 1052b702436aSPaul Blakey nf_conn_act_ct_ext_fill(skb, ct, ctinfo); 105305752523SJarno Rajahalme } 10547f8a436eSJoe Stringer 10557f8a436eSJoe Stringer return 0; 10567f8a436eSJoe Stringer } 10577f8a436eSJoe Stringer 10587f8a436eSJoe Stringer /* Lookup connection and read fields into key. */ 10597f8a436eSJoe Stringer static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 10607f8a436eSJoe Stringer const struct ovs_conntrack_info *info, 10617f8a436eSJoe Stringer struct sk_buff *skb) 10627f8a436eSJoe Stringer { 10637f8a436eSJoe Stringer struct nf_conntrack_expect *exp; 10647f8a436eSJoe Stringer 10659f13ded8SJarno Rajahalme /* If we pass an expected packet through nf_conntrack_in() the 10669f13ded8SJarno Rajahalme * expectation is typically removed, but the packet could still be 10679f13ded8SJarno Rajahalme * lost in upcall processing. To prevent this from happening we 10689f13ded8SJarno Rajahalme * perform an explicit expectation lookup. Expected connections are 10699f13ded8SJarno Rajahalme * always new, and will be passed through conntrack only when they are 10709f13ded8SJarno Rajahalme * committed, as it is OK to remove the expectation at that time. 10719f13ded8SJarno Rajahalme */ 10727f8a436eSJoe Stringer exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); 10737f8a436eSJoe Stringer if (exp) { 10747f8a436eSJoe Stringer u8 state; 10757f8a436eSJoe Stringer 107605752523SJarno Rajahalme /* NOTE: New connections are NATted and Helped only when 107705752523SJarno Rajahalme * committed, so we are not calling into NAT here. 107805752523SJarno Rajahalme */ 10797f8a436eSJoe Stringer state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; 1080182e3042SJoe Stringer __ovs_ct_update_key(key, state, &info->zone, exp->master); 1081d913d3a7SSamuel Gauthier } else { 1082d913d3a7SSamuel Gauthier struct nf_conn *ct; 1083d913d3a7SSamuel Gauthier int err; 1084d913d3a7SSamuel Gauthier 1085d913d3a7SSamuel Gauthier err = __ovs_ct_lookup(net, key, info, skb); 1086d913d3a7SSamuel Gauthier if (err) 1087d913d3a7SSamuel Gauthier return err; 1088d913d3a7SSamuel Gauthier 1089cb9c6836SFlorian Westphal ct = (struct nf_conn *)skb_nfct(skb); 1090d913d3a7SSamuel Gauthier if (ct) 1091d913d3a7SSamuel Gauthier nf_ct_deliver_cached_events(ct); 1092d913d3a7SSamuel Gauthier } 10937f8a436eSJoe Stringer 10947f8a436eSJoe Stringer return 0; 10957f8a436eSJoe Stringer } 10967f8a436eSJoe Stringer 109733db4125SJoe Stringer static bool labels_nonzero(const struct ovs_key_ct_labels *labels) 1098c2ac6673SJoe Stringer { 1099c2ac6673SJoe Stringer size_t i; 1100c2ac6673SJoe Stringer 1101cb80d58fSJarno Rajahalme for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) 1102cb80d58fSJarno Rajahalme if (labels->ct_labels_32[i]) 1103c2ac6673SJoe Stringer return true; 1104c2ac6673SJoe Stringer 1105c2ac6673SJoe Stringer return false; 1106c2ac6673SJoe Stringer } 1107c2ac6673SJoe Stringer 110811efd5cbSYi-Hung Wei #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 110911efd5cbSYi-Hung Wei static struct hlist_head *ct_limit_hash_bucket( 111011efd5cbSYi-Hung Wei const struct ovs_ct_limit_info *info, u16 zone) 111111efd5cbSYi-Hung Wei { 111211efd5cbSYi-Hung Wei return &info->limits[zone & (CT_LIMIT_HASH_BUCKETS - 1)]; 111311efd5cbSYi-Hung Wei } 111411efd5cbSYi-Hung Wei 111511efd5cbSYi-Hung Wei /* Call with ovs_mutex */ 111611efd5cbSYi-Hung Wei static void ct_limit_set(const struct ovs_ct_limit_info *info, 111711efd5cbSYi-Hung Wei struct ovs_ct_limit *new_ct_limit) 111811efd5cbSYi-Hung Wei { 111911efd5cbSYi-Hung Wei struct ovs_ct_limit *ct_limit; 112011efd5cbSYi-Hung Wei struct hlist_head *head; 112111efd5cbSYi-Hung Wei 112211efd5cbSYi-Hung Wei head = ct_limit_hash_bucket(info, new_ct_limit->zone); 112311efd5cbSYi-Hung Wei hlist_for_each_entry_rcu(ct_limit, head, hlist_node) { 112411efd5cbSYi-Hung Wei if (ct_limit->zone == new_ct_limit->zone) { 112511efd5cbSYi-Hung Wei hlist_replace_rcu(&ct_limit->hlist_node, 112611efd5cbSYi-Hung Wei &new_ct_limit->hlist_node); 112711efd5cbSYi-Hung Wei kfree_rcu(ct_limit, rcu); 112811efd5cbSYi-Hung Wei return; 112911efd5cbSYi-Hung Wei } 113011efd5cbSYi-Hung Wei } 113111efd5cbSYi-Hung Wei 113211efd5cbSYi-Hung Wei hlist_add_head_rcu(&new_ct_limit->hlist_node, head); 113311efd5cbSYi-Hung Wei } 113411efd5cbSYi-Hung Wei 113511efd5cbSYi-Hung Wei /* Call with ovs_mutex */ 113611efd5cbSYi-Hung Wei static void ct_limit_del(const struct ovs_ct_limit_info *info, u16 zone) 113711efd5cbSYi-Hung Wei { 113811efd5cbSYi-Hung Wei struct ovs_ct_limit *ct_limit; 113911efd5cbSYi-Hung Wei struct hlist_head *head; 114011efd5cbSYi-Hung Wei struct hlist_node *n; 114111efd5cbSYi-Hung Wei 114211efd5cbSYi-Hung Wei head = ct_limit_hash_bucket(info, zone); 114311efd5cbSYi-Hung Wei hlist_for_each_entry_safe(ct_limit, n, head, hlist_node) { 114411efd5cbSYi-Hung Wei if (ct_limit->zone == zone) { 114511efd5cbSYi-Hung Wei hlist_del_rcu(&ct_limit->hlist_node); 114611efd5cbSYi-Hung Wei kfree_rcu(ct_limit, rcu); 114711efd5cbSYi-Hung Wei return; 114811efd5cbSYi-Hung Wei } 114911efd5cbSYi-Hung Wei } 115011efd5cbSYi-Hung Wei } 115111efd5cbSYi-Hung Wei 115211efd5cbSYi-Hung Wei /* Call with RCU read lock */ 115311efd5cbSYi-Hung Wei static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone) 115411efd5cbSYi-Hung Wei { 115511efd5cbSYi-Hung Wei struct ovs_ct_limit *ct_limit; 115611efd5cbSYi-Hung Wei struct hlist_head *head; 115711efd5cbSYi-Hung Wei 115811efd5cbSYi-Hung Wei head = ct_limit_hash_bucket(info, zone); 115911efd5cbSYi-Hung Wei hlist_for_each_entry_rcu(ct_limit, head, hlist_node) { 116011efd5cbSYi-Hung Wei if (ct_limit->zone == zone) 116111efd5cbSYi-Hung Wei return ct_limit->limit; 116211efd5cbSYi-Hung Wei } 116311efd5cbSYi-Hung Wei 116411efd5cbSYi-Hung Wei return info->default_limit; 116511efd5cbSYi-Hung Wei } 116611efd5cbSYi-Hung Wei 116711efd5cbSYi-Hung Wei static int ovs_ct_check_limit(struct net *net, 116811efd5cbSYi-Hung Wei const struct ovs_conntrack_info *info, 116911efd5cbSYi-Hung Wei const struct nf_conntrack_tuple *tuple) 117011efd5cbSYi-Hung Wei { 117111efd5cbSYi-Hung Wei struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 117211efd5cbSYi-Hung Wei const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; 117311efd5cbSYi-Hung Wei u32 per_zone_limit, connections; 117411efd5cbSYi-Hung Wei u32 conncount_key; 117511efd5cbSYi-Hung Wei 117611efd5cbSYi-Hung Wei conncount_key = info->zone.id; 117711efd5cbSYi-Hung Wei 117811efd5cbSYi-Hung Wei per_zone_limit = ct_limit_get(ct_limit_info, info->zone.id); 117911efd5cbSYi-Hung Wei if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED) 118011efd5cbSYi-Hung Wei return 0; 118111efd5cbSYi-Hung Wei 118211efd5cbSYi-Hung Wei connections = nf_conncount_count(net, ct_limit_info->data, 118311efd5cbSYi-Hung Wei &conncount_key, tuple, &info->zone); 118411efd5cbSYi-Hung Wei if (connections > per_zone_limit) 118511efd5cbSYi-Hung Wei return -ENOMEM; 118611efd5cbSYi-Hung Wei 118711efd5cbSYi-Hung Wei return 0; 118811efd5cbSYi-Hung Wei } 118911efd5cbSYi-Hung Wei #endif 119011efd5cbSYi-Hung Wei 11917d904c7bSJarno Rajahalme /* Lookup connection and confirm if unconfirmed. */ 11927d904c7bSJarno Rajahalme static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, 11937d904c7bSJarno Rajahalme const struct ovs_conntrack_info *info, 11947d904c7bSJarno Rajahalme struct sk_buff *skb) 11957d904c7bSJarno Rajahalme { 11966ffcea79SJarno Rajahalme enum ip_conntrack_info ctinfo; 11976ffcea79SJarno Rajahalme struct nf_conn *ct; 11987d904c7bSJarno Rajahalme int err; 11997d904c7bSJarno Rajahalme 12007d904c7bSJarno Rajahalme err = __ovs_ct_lookup(net, key, info, skb); 12017d904c7bSJarno Rajahalme if (err) 12027d904c7bSJarno Rajahalme return err; 12037d904c7bSJarno Rajahalme 12046ffcea79SJarno Rajahalme /* The connection could be invalid, in which case this is a no-op.*/ 12056ffcea79SJarno Rajahalme ct = nf_ct_get(skb, &ctinfo); 12066ffcea79SJarno Rajahalme if (!ct) 12076ffcea79SJarno Rajahalme return 0; 12086ffcea79SJarno Rajahalme 120911efd5cbSYi-Hung Wei #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 121011efd5cbSYi-Hung Wei if (static_branch_unlikely(&ovs_ct_limit_enabled)) { 121111efd5cbSYi-Hung Wei if (!nf_ct_is_confirmed(ct)) { 121211efd5cbSYi-Hung Wei err = ovs_ct_check_limit(net, info, 121311efd5cbSYi-Hung Wei &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 121411efd5cbSYi-Hung Wei if (err) { 121511efd5cbSYi-Hung Wei net_warn_ratelimited("openvswitch: zone: %u " 121643d0e960SColin Ian King "exceeds conntrack limit\n", 121711efd5cbSYi-Hung Wei info->zone.id); 121811efd5cbSYi-Hung Wei return err; 121911efd5cbSYi-Hung Wei } 122011efd5cbSYi-Hung Wei } 122111efd5cbSYi-Hung Wei } 122211efd5cbSYi-Hung Wei #endif 122311efd5cbSYi-Hung Wei 122412064551SJarno Rajahalme /* Set the conntrack event mask if given. NEW and DELETE events have 122512064551SJarno Rajahalme * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener 122612064551SJarno Rajahalme * typically would receive many kinds of updates. Setting the event 122712064551SJarno Rajahalme * mask allows those events to be filtered. The set event mask will 122812064551SJarno Rajahalme * remain in effect for the lifetime of the connection unless changed 122912064551SJarno Rajahalme * by a further CT action with both the commit flag and the eventmask 123012064551SJarno Rajahalme * option. */ 123112064551SJarno Rajahalme if (info->have_eventmask) { 123212064551SJarno Rajahalme struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct); 123312064551SJarno Rajahalme 123412064551SJarno Rajahalme if (cache) 123512064551SJarno Rajahalme cache->ctmask = info->eventmask; 123612064551SJarno Rajahalme } 123712064551SJarno Rajahalme 12387d904c7bSJarno Rajahalme /* Apply changes before confirming the connection so that the initial 12397d904c7bSJarno Rajahalme * conntrack NEW netlink event carries the values given in the CT 12407d904c7bSJarno Rajahalme * action. 12417d904c7bSJarno Rajahalme */ 12427d904c7bSJarno Rajahalme if (info->mark.mask) { 12436ffcea79SJarno Rajahalme err = ovs_ct_set_mark(ct, key, info->mark.value, 12447d904c7bSJarno Rajahalme info->mark.mask); 12457d904c7bSJarno Rajahalme if (err) 12467d904c7bSJarno Rajahalme return err; 12477d904c7bSJarno Rajahalme } 124809aa98adSJarno Rajahalme if (!nf_ct_is_confirmed(ct)) { 12496ffcea79SJarno Rajahalme err = ovs_ct_init_labels(ct, key, &info->labels.value, 12506ffcea79SJarno Rajahalme &info->labels.mask); 125109aa98adSJarno Rajahalme if (err) 125209aa98adSJarno Rajahalme return err; 1253b702436aSPaul Blakey 1254b702436aSPaul Blakey nf_conn_act_ct_ext_add(ct); 1255a277d516SArnd Bergmann } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1256a277d516SArnd Bergmann labels_nonzero(&info->labels.mask)) { 12576ffcea79SJarno Rajahalme err = ovs_ct_set_labels(ct, key, &info->labels.value, 12587d904c7bSJarno Rajahalme &info->labels.mask); 12597d904c7bSJarno Rajahalme if (err) 12607d904c7bSJarno Rajahalme return err; 12617d904c7bSJarno Rajahalme } 12627d904c7bSJarno Rajahalme /* This will take care of sending queued events even if the connection 12637d904c7bSJarno Rajahalme * is already confirmed. 12647d904c7bSJarno Rajahalme */ 12657d904c7bSJarno Rajahalme if (nf_conntrack_confirm(skb) != NF_ACCEPT) 12667d904c7bSJarno Rajahalme return -EINVAL; 12677d904c7bSJarno Rajahalme 12687d904c7bSJarno Rajahalme return 0; 12697d904c7bSJarno Rajahalme } 12707d904c7bSJarno Rajahalme 12719382fe71SEd Swierk /* Trim the skb to the length specified by the IP/IPv6 header, 12729382fe71SEd Swierk * removing any trailing lower-layer padding. This prepares the skb 12739382fe71SEd Swierk * for higher-layer processing that assumes skb->len excludes padding 12749382fe71SEd Swierk * (such as nf_ip_checksum). The caller needs to pull the skb to the 12759382fe71SEd Swierk * network header, and ensure ip_hdr/ipv6_hdr points to valid data. 12769382fe71SEd Swierk */ 12779382fe71SEd Swierk static int ovs_skb_network_trim(struct sk_buff *skb) 12789382fe71SEd Swierk { 12799382fe71SEd Swierk unsigned int len; 12809382fe71SEd Swierk int err; 12819382fe71SEd Swierk 12829382fe71SEd Swierk switch (skb->protocol) { 12839382fe71SEd Swierk case htons(ETH_P_IP): 12849382fe71SEd Swierk len = ntohs(ip_hdr(skb)->tot_len); 12859382fe71SEd Swierk break; 12869382fe71SEd Swierk case htons(ETH_P_IPV6): 12879382fe71SEd Swierk len = sizeof(struct ipv6hdr) 12889382fe71SEd Swierk + ntohs(ipv6_hdr(skb)->payload_len); 12899382fe71SEd Swierk break; 12909382fe71SEd Swierk default: 12919382fe71SEd Swierk len = skb->len; 12929382fe71SEd Swierk } 12939382fe71SEd Swierk 12949382fe71SEd Swierk err = pskb_trim_rcsum(skb, len); 12959382fe71SEd Swierk if (err) 12969382fe71SEd Swierk kfree_skb(skb); 12979382fe71SEd Swierk 12989382fe71SEd Swierk return err; 12999382fe71SEd Swierk } 13009382fe71SEd Swierk 130174c16618SJoe Stringer /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 130274c16618SJoe Stringer * value if 'skb' is freed. 130374c16618SJoe Stringer */ 13047f8a436eSJoe Stringer int ovs_ct_execute(struct net *net, struct sk_buff *skb, 13057f8a436eSJoe Stringer struct sw_flow_key *key, 13067f8a436eSJoe Stringer const struct ovs_conntrack_info *info) 13077f8a436eSJoe Stringer { 13087f8a436eSJoe Stringer int nh_ofs; 13097f8a436eSJoe Stringer int err; 13107f8a436eSJoe Stringer 13117f8a436eSJoe Stringer /* The conntrack module expects to be working at L3. */ 13127f8a436eSJoe Stringer nh_ofs = skb_network_offset(skb); 131375f01a4cSLance Richardson skb_pull_rcsum(skb, nh_ofs); 13147f8a436eSJoe Stringer 13159382fe71SEd Swierk err = ovs_skb_network_trim(skb); 13169382fe71SEd Swierk if (err) 13179382fe71SEd Swierk return err; 13189382fe71SEd Swierk 13197f8a436eSJoe Stringer if (key->ip.frag != OVS_FRAG_TYPE_NONE) { 13207f8a436eSJoe Stringer err = handle_fragments(net, key, info->zone.id, skb); 13217f8a436eSJoe Stringer if (err) 13227f8a436eSJoe Stringer return err; 13237f8a436eSJoe Stringer } 13247f8a436eSJoe Stringer 1325ab38a7b5SJoe Stringer if (info->commit) 13267d904c7bSJarno Rajahalme err = ovs_ct_commit(net, key, info, skb); 13277f8a436eSJoe Stringer else 13287f8a436eSJoe Stringer err = ovs_ct_lookup(net, key, info, skb); 13297f8a436eSJoe Stringer 13307d42e84eSChristophe JAILLET skb_push_rcsum(skb, nh_ofs); 133174c16618SJoe Stringer if (err) 133274c16618SJoe Stringer kfree_skb(skb); 13337f8a436eSJoe Stringer return err; 13347f8a436eSJoe Stringer } 13357f8a436eSJoe Stringer 1336b8226962SEric Garver int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key) 1337b8226962SEric Garver { 1338408bdcfcSFlorian Westphal enum ip_conntrack_info ctinfo; 1339408bdcfcSFlorian Westphal struct nf_conn *ct; 1340408bdcfcSFlorian Westphal 1341408bdcfcSFlorian Westphal ct = nf_ct_get(skb, &ctinfo); 1342408bdcfcSFlorian Westphal 1343408bdcfcSFlorian Westphal nf_ct_put(ct); 1344b8226962SEric Garver nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 13452061ecfdSIlya Maximets 13462061ecfdSIlya Maximets if (key) 1347d29334c1Swenxu ovs_ct_fill_key(skb, key, false); 1348b8226962SEric Garver 1349b8226962SEric Garver return 0; 1350b8226962SEric Garver } 1351b8226962SEric Garver 1352cae3a262SJoe Stringer static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, 1353cae3a262SJoe Stringer const struct sw_flow_key *key, bool log) 1354cae3a262SJoe Stringer { 1355cae3a262SJoe Stringer struct nf_conntrack_helper *helper; 1356cae3a262SJoe Stringer struct nf_conn_help *help; 1357fec9c271SFlavio Leitner int ret = 0; 1358cae3a262SJoe Stringer 1359cae3a262SJoe Stringer helper = nf_conntrack_helper_try_module_get(name, info->family, 1360cae3a262SJoe Stringer key->ip.proto); 1361cae3a262SJoe Stringer if (!helper) { 1362cae3a262SJoe Stringer OVS_NLERR(log, "Unknown helper \"%s\"", name); 1363cae3a262SJoe Stringer return -EINVAL; 1364cae3a262SJoe Stringer } 1365cae3a262SJoe Stringer 1366440534d3SGao Feng help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL); 1367cae3a262SJoe Stringer if (!help) { 1368d91fc59cSLiping Zhang nf_conntrack_helper_put(helper); 1369cae3a262SJoe Stringer return -ENOMEM; 1370cae3a262SJoe Stringer } 1371cae3a262SJoe Stringer 1372f319ca65SGeert Uytterhoeven #if IS_ENABLED(CONFIG_NF_NAT) 1373fec9c271SFlavio Leitner if (info->nat) { 1374fec9c271SFlavio Leitner ret = nf_nat_helper_try_module_get(name, info->family, 1375fec9c271SFlavio Leitner key->ip.proto); 1376fec9c271SFlavio Leitner if (ret) { 1377fec9c271SFlavio Leitner nf_conntrack_helper_put(helper); 1378fec9c271SFlavio Leitner OVS_NLERR(log, "Failed to load \"%s\" NAT helper, error: %d", 1379fec9c271SFlavio Leitner name, ret); 1380fec9c271SFlavio Leitner return ret; 1381fec9c271SFlavio Leitner } 1382fec9c271SFlavio Leitner } 1383fec9c271SFlavio Leitner #endif 1384cae3a262SJoe Stringer rcu_assign_pointer(help->helper, helper); 1385cae3a262SJoe Stringer info->helper = helper; 1386fec9c271SFlavio Leitner return ret; 1387cae3a262SJoe Stringer } 1388cae3a262SJoe Stringer 13894806e975SFlorian Westphal #if IS_ENABLED(CONFIG_NF_NAT) 139005752523SJarno Rajahalme static int parse_nat(const struct nlattr *attr, 139105752523SJarno Rajahalme struct ovs_conntrack_info *info, bool log) 139205752523SJarno Rajahalme { 139305752523SJarno Rajahalme struct nlattr *a; 139405752523SJarno Rajahalme int rem; 139505752523SJarno Rajahalme bool have_ip_max = false; 139605752523SJarno Rajahalme bool have_proto_max = false; 139705752523SJarno Rajahalme bool ip_vers = (info->family == NFPROTO_IPV6); 139805752523SJarno Rajahalme 139905752523SJarno Rajahalme nla_for_each_nested(a, attr, rem) { 140005752523SJarno Rajahalme static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = { 140105752523SJarno Rajahalme [OVS_NAT_ATTR_SRC] = {0, 0}, 140205752523SJarno Rajahalme [OVS_NAT_ATTR_DST] = {0, 0}, 140305752523SJarno Rajahalme [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr), 140405752523SJarno Rajahalme sizeof(struct in6_addr)}, 140505752523SJarno Rajahalme [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr), 140605752523SJarno Rajahalme sizeof(struct in6_addr)}, 140705752523SJarno Rajahalme [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)}, 140805752523SJarno Rajahalme [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)}, 140905752523SJarno Rajahalme [OVS_NAT_ATTR_PERSISTENT] = {0, 0}, 141005752523SJarno Rajahalme [OVS_NAT_ATTR_PROTO_HASH] = {0, 0}, 141105752523SJarno Rajahalme [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0}, 141205752523SJarno Rajahalme }; 141305752523SJarno Rajahalme int type = nla_type(a); 141405752523SJarno Rajahalme 141505752523SJarno Rajahalme if (type > OVS_NAT_ATTR_MAX) { 14160ed80da5SJoe Perches OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)", 141705752523SJarno Rajahalme type, OVS_NAT_ATTR_MAX); 141805752523SJarno Rajahalme return -EINVAL; 141905752523SJarno Rajahalme } 142005752523SJarno Rajahalme 142105752523SJarno Rajahalme if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) { 14220ed80da5SJoe Perches OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)", 142305752523SJarno Rajahalme type, nla_len(a), 142405752523SJarno Rajahalme ovs_nat_attr_lens[type][ip_vers]); 142505752523SJarno Rajahalme return -EINVAL; 142605752523SJarno Rajahalme } 142705752523SJarno Rajahalme 142805752523SJarno Rajahalme switch (type) { 142905752523SJarno Rajahalme case OVS_NAT_ATTR_SRC: 143005752523SJarno Rajahalme case OVS_NAT_ATTR_DST: 143105752523SJarno Rajahalme if (info->nat) { 14320ed80da5SJoe Perches OVS_NLERR(log, "Only one type of NAT may be specified"); 143305752523SJarno Rajahalme return -ERANGE; 143405752523SJarno Rajahalme } 143505752523SJarno Rajahalme info->nat |= OVS_CT_NAT; 143605752523SJarno Rajahalme info->nat |= ((type == OVS_NAT_ATTR_SRC) 143705752523SJarno Rajahalme ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT); 143805752523SJarno Rajahalme break; 143905752523SJarno Rajahalme 144005752523SJarno Rajahalme case OVS_NAT_ATTR_IP_MIN: 1441ac71b46eSHaishuang Yan nla_memcpy(&info->range.min_addr, a, 1442ac71b46eSHaishuang Yan sizeof(info->range.min_addr)); 144305752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_MAP_IPS; 144405752523SJarno Rajahalme break; 144505752523SJarno Rajahalme 144605752523SJarno Rajahalme case OVS_NAT_ATTR_IP_MAX: 144705752523SJarno Rajahalme have_ip_max = true; 144805752523SJarno Rajahalme nla_memcpy(&info->range.max_addr, a, 144905752523SJarno Rajahalme sizeof(info->range.max_addr)); 145005752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_MAP_IPS; 145105752523SJarno Rajahalme break; 145205752523SJarno Rajahalme 145305752523SJarno Rajahalme case OVS_NAT_ATTR_PROTO_MIN: 145405752523SJarno Rajahalme info->range.min_proto.all = htons(nla_get_u16(a)); 145505752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 145605752523SJarno Rajahalme break; 145705752523SJarno Rajahalme 145805752523SJarno Rajahalme case OVS_NAT_ATTR_PROTO_MAX: 145905752523SJarno Rajahalme have_proto_max = true; 146005752523SJarno Rajahalme info->range.max_proto.all = htons(nla_get_u16(a)); 146105752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 146205752523SJarno Rajahalme break; 146305752523SJarno Rajahalme 146405752523SJarno Rajahalme case OVS_NAT_ATTR_PERSISTENT: 146505752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PERSISTENT; 146605752523SJarno Rajahalme break; 146705752523SJarno Rajahalme 146805752523SJarno Rajahalme case OVS_NAT_ATTR_PROTO_HASH: 146905752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM; 147005752523SJarno Rajahalme break; 147105752523SJarno Rajahalme 147205752523SJarno Rajahalme case OVS_NAT_ATTR_PROTO_RANDOM: 147305752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY; 147405752523SJarno Rajahalme break; 147505752523SJarno Rajahalme 147605752523SJarno Rajahalme default: 14770ed80da5SJoe Perches OVS_NLERR(log, "Unknown nat attribute (%d)", type); 147805752523SJarno Rajahalme return -EINVAL; 147905752523SJarno Rajahalme } 148005752523SJarno Rajahalme } 148105752523SJarno Rajahalme 148205752523SJarno Rajahalme if (rem > 0) { 14830ed80da5SJoe Perches OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem); 148405752523SJarno Rajahalme return -EINVAL; 148505752523SJarno Rajahalme } 148605752523SJarno Rajahalme if (!info->nat) { 148705752523SJarno Rajahalme /* Do not allow flags if no type is given. */ 148805752523SJarno Rajahalme if (info->range.flags) { 148905752523SJarno Rajahalme OVS_NLERR(log, 1490e0b10844SJulia Lawall "NAT flags may be given only when NAT range (SRC or DST) is also specified." 149105752523SJarno Rajahalme ); 149205752523SJarno Rajahalme return -EINVAL; 149305752523SJarno Rajahalme } 149405752523SJarno Rajahalme info->nat = OVS_CT_NAT; /* NAT existing connections. */ 149505752523SJarno Rajahalme } else if (!info->commit) { 149605752523SJarno Rajahalme OVS_NLERR(log, 1497e0b10844SJulia Lawall "NAT attributes may be specified only when CT COMMIT flag is also specified." 149805752523SJarno Rajahalme ); 149905752523SJarno Rajahalme return -EINVAL; 150005752523SJarno Rajahalme } 150105752523SJarno Rajahalme /* Allow missing IP_MAX. */ 150205752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) { 150305752523SJarno Rajahalme memcpy(&info->range.max_addr, &info->range.min_addr, 150405752523SJarno Rajahalme sizeof(info->range.max_addr)); 150505752523SJarno Rajahalme } 150605752523SJarno Rajahalme /* Allow missing PROTO_MAX. */ 150705752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 150805752523SJarno Rajahalme !have_proto_max) { 150905752523SJarno Rajahalme info->range.max_proto.all = info->range.min_proto.all; 151005752523SJarno Rajahalme } 151105752523SJarno Rajahalme return 0; 151205752523SJarno Rajahalme } 151305752523SJarno Rajahalme #endif 151405752523SJarno Rajahalme 15157f8a436eSJoe Stringer static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { 1516ab38a7b5SJoe Stringer [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1517dd41d33fSJarno Rajahalme [OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 }, 15187f8a436eSJoe Stringer [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), 15197f8a436eSJoe Stringer .maxlen = sizeof(u16) }, 1520182e3042SJoe Stringer [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), 1521182e3042SJoe Stringer .maxlen = sizeof(struct md_mark) }, 152233db4125SJoe Stringer [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), 152333db4125SJoe Stringer .maxlen = sizeof(struct md_labels) }, 1524cae3a262SJoe Stringer [OVS_CT_ATTR_HELPER] = { .minlen = 1, 152505752523SJarno Rajahalme .maxlen = NF_CT_HELPER_NAME_LEN }, 15264806e975SFlorian Westphal #if IS_ENABLED(CONFIG_NF_NAT) 152705752523SJarno Rajahalme /* NAT length is checked when parsing the nested attributes. */ 152805752523SJarno Rajahalme [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, 152905752523SJarno Rajahalme #endif 153012064551SJarno Rajahalme [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32), 153112064551SJarno Rajahalme .maxlen = sizeof(u32) }, 153206bd2bdfSYi-Hung Wei [OVS_CT_ATTR_TIMEOUT] = { .minlen = 1, 153306bd2bdfSYi-Hung Wei .maxlen = CTNL_TIMEOUT_NAME_MAX }, 15347f8a436eSJoe Stringer }; 15357f8a436eSJoe Stringer 15367f8a436eSJoe Stringer static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, 1537cae3a262SJoe Stringer const char **helper, bool log) 15387f8a436eSJoe Stringer { 15397f8a436eSJoe Stringer struct nlattr *a; 15407f8a436eSJoe Stringer int rem; 15417f8a436eSJoe Stringer 15427f8a436eSJoe Stringer nla_for_each_nested(a, attr, rem) { 15437f8a436eSJoe Stringer int type = nla_type(a); 154469ec932eSLiping Zhang int maxlen; 154569ec932eSLiping Zhang int minlen; 15467f8a436eSJoe Stringer 15477f8a436eSJoe Stringer if (type > OVS_CT_ATTR_MAX) { 15487f8a436eSJoe Stringer OVS_NLERR(log, 15497f8a436eSJoe Stringer "Unknown conntrack attr (type=%d, max=%d)", 15507f8a436eSJoe Stringer type, OVS_CT_ATTR_MAX); 15517f8a436eSJoe Stringer return -EINVAL; 15527f8a436eSJoe Stringer } 155369ec932eSLiping Zhang 155469ec932eSLiping Zhang maxlen = ovs_ct_attr_lens[type].maxlen; 155569ec932eSLiping Zhang minlen = ovs_ct_attr_lens[type].minlen; 15567f8a436eSJoe Stringer if (nla_len(a) < minlen || nla_len(a) > maxlen) { 15577f8a436eSJoe Stringer OVS_NLERR(log, 15587f8a436eSJoe Stringer "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", 15597f8a436eSJoe Stringer type, nla_len(a), maxlen); 15607f8a436eSJoe Stringer return -EINVAL; 15617f8a436eSJoe Stringer } 15627f8a436eSJoe Stringer 15637f8a436eSJoe Stringer switch (type) { 1564dd41d33fSJarno Rajahalme case OVS_CT_ATTR_FORCE_COMMIT: 1565dd41d33fSJarno Rajahalme info->force = true; 1566df561f66SGustavo A. R. Silva fallthrough; 1567ab38a7b5SJoe Stringer case OVS_CT_ATTR_COMMIT: 1568ab38a7b5SJoe Stringer info->commit = true; 15697f8a436eSJoe Stringer break; 15707f8a436eSJoe Stringer #ifdef CONFIG_NF_CONNTRACK_ZONES 15717f8a436eSJoe Stringer case OVS_CT_ATTR_ZONE: 15727f8a436eSJoe Stringer info->zone.id = nla_get_u16(a); 15737f8a436eSJoe Stringer break; 15747f8a436eSJoe Stringer #endif 1575182e3042SJoe Stringer #ifdef CONFIG_NF_CONNTRACK_MARK 1576182e3042SJoe Stringer case OVS_CT_ATTR_MARK: { 1577182e3042SJoe Stringer struct md_mark *mark = nla_data(a); 1578182e3042SJoe Stringer 1579e754ec69SJoe Stringer if (!mark->mask) { 1580e754ec69SJoe Stringer OVS_NLERR(log, "ct_mark mask cannot be 0"); 1581e754ec69SJoe Stringer return -EINVAL; 1582e754ec69SJoe Stringer } 1583182e3042SJoe Stringer info->mark = *mark; 1584182e3042SJoe Stringer break; 1585182e3042SJoe Stringer } 1586182e3042SJoe Stringer #endif 1587c2ac6673SJoe Stringer #ifdef CONFIG_NF_CONNTRACK_LABELS 158833db4125SJoe Stringer case OVS_CT_ATTR_LABELS: { 158933db4125SJoe Stringer struct md_labels *labels = nla_data(a); 1590c2ac6673SJoe Stringer 1591e754ec69SJoe Stringer if (!labels_nonzero(&labels->mask)) { 1592e754ec69SJoe Stringer OVS_NLERR(log, "ct_labels mask cannot be 0"); 1593e754ec69SJoe Stringer return -EINVAL; 1594e754ec69SJoe Stringer } 159533db4125SJoe Stringer info->labels = *labels; 1596c2ac6673SJoe Stringer break; 1597c2ac6673SJoe Stringer } 1598c2ac6673SJoe Stringer #endif 1599cae3a262SJoe Stringer case OVS_CT_ATTR_HELPER: 1600cae3a262SJoe Stringer *helper = nla_data(a); 1601cae3a262SJoe Stringer if (!memchr(*helper, '\0', nla_len(a))) { 1602cae3a262SJoe Stringer OVS_NLERR(log, "Invalid conntrack helper"); 1603cae3a262SJoe Stringer return -EINVAL; 1604cae3a262SJoe Stringer } 1605cae3a262SJoe Stringer break; 16064806e975SFlorian Westphal #if IS_ENABLED(CONFIG_NF_NAT) 160705752523SJarno Rajahalme case OVS_CT_ATTR_NAT: { 160805752523SJarno Rajahalme int err = parse_nat(a, info, log); 160905752523SJarno Rajahalme 161005752523SJarno Rajahalme if (err) 161105752523SJarno Rajahalme return err; 161205752523SJarno Rajahalme break; 161305752523SJarno Rajahalme } 161405752523SJarno Rajahalme #endif 161512064551SJarno Rajahalme case OVS_CT_ATTR_EVENTMASK: 161612064551SJarno Rajahalme info->have_eventmask = true; 161712064551SJarno Rajahalme info->eventmask = nla_get_u32(a); 161812064551SJarno Rajahalme break; 161906bd2bdfSYi-Hung Wei #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 162006bd2bdfSYi-Hung Wei case OVS_CT_ATTR_TIMEOUT: 162106bd2bdfSYi-Hung Wei memcpy(info->timeout, nla_data(a), nla_len(a)); 162206bd2bdfSYi-Hung Wei if (!memchr(info->timeout, '\0', nla_len(a))) { 162312c6bc38SYi-Hung Wei OVS_NLERR(log, "Invalid conntrack timeout"); 162406bd2bdfSYi-Hung Wei return -EINVAL; 162506bd2bdfSYi-Hung Wei } 162606bd2bdfSYi-Hung Wei break; 162706bd2bdfSYi-Hung Wei #endif 162812064551SJarno Rajahalme 16297f8a436eSJoe Stringer default: 16307f8a436eSJoe Stringer OVS_NLERR(log, "Unknown conntrack attr (%d)", 16317f8a436eSJoe Stringer type); 16327f8a436eSJoe Stringer return -EINVAL; 16337f8a436eSJoe Stringer } 16347f8a436eSJoe Stringer } 16357f8a436eSJoe Stringer 16367d904c7bSJarno Rajahalme #ifdef CONFIG_NF_CONNTRACK_MARK 16377d904c7bSJarno Rajahalme if (!info->commit && info->mark.mask) { 16387d904c7bSJarno Rajahalme OVS_NLERR(log, 16397d904c7bSJarno Rajahalme "Setting conntrack mark requires 'commit' flag."); 16407d904c7bSJarno Rajahalme return -EINVAL; 16417d904c7bSJarno Rajahalme } 16427d904c7bSJarno Rajahalme #endif 16437d904c7bSJarno Rajahalme #ifdef CONFIG_NF_CONNTRACK_LABELS 16447d904c7bSJarno Rajahalme if (!info->commit && labels_nonzero(&info->labels.mask)) { 16457d904c7bSJarno Rajahalme OVS_NLERR(log, 16467d904c7bSJarno Rajahalme "Setting conntrack labels requires 'commit' flag."); 16477d904c7bSJarno Rajahalme return -EINVAL; 16487d904c7bSJarno Rajahalme } 16497d904c7bSJarno Rajahalme #endif 16507f8a436eSJoe Stringer if (rem > 0) { 16517f8a436eSJoe Stringer OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem); 16527f8a436eSJoe Stringer return -EINVAL; 16537f8a436eSJoe Stringer } 16547f8a436eSJoe Stringer 16557f8a436eSJoe Stringer return 0; 16567f8a436eSJoe Stringer } 16577f8a436eSJoe Stringer 1658c2ac6673SJoe Stringer bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) 16597f8a436eSJoe Stringer { 16607f8a436eSJoe Stringer if (attr == OVS_KEY_ATTR_CT_STATE) 16617f8a436eSJoe Stringer return true; 16627f8a436eSJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 16637f8a436eSJoe Stringer attr == OVS_KEY_ATTR_CT_ZONE) 16647f8a436eSJoe Stringer return true; 1665182e3042SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 1666182e3042SJoe Stringer attr == OVS_KEY_ATTR_CT_MARK) 1667182e3042SJoe Stringer return true; 1668c2ac6673SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 166933db4125SJoe Stringer attr == OVS_KEY_ATTR_CT_LABELS) { 1670c2ac6673SJoe Stringer struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1671c2ac6673SJoe Stringer 1672c2ac6673SJoe Stringer return ovs_net->xt_label; 1673c2ac6673SJoe Stringer } 16747f8a436eSJoe Stringer 16757f8a436eSJoe Stringer return false; 16767f8a436eSJoe Stringer } 16777f8a436eSJoe Stringer 16787f8a436eSJoe Stringer int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, 16797f8a436eSJoe Stringer const struct sw_flow_key *key, 16807f8a436eSJoe Stringer struct sw_flow_actions **sfa, bool log) 16817f8a436eSJoe Stringer { 16827f8a436eSJoe Stringer struct ovs_conntrack_info ct_info; 1683cae3a262SJoe Stringer const char *helper = NULL; 16847f8a436eSJoe Stringer u16 family; 16857f8a436eSJoe Stringer int err; 16867f8a436eSJoe Stringer 16877f8a436eSJoe Stringer family = key_to_nfproto(key); 16887f8a436eSJoe Stringer if (family == NFPROTO_UNSPEC) { 16897f8a436eSJoe Stringer OVS_NLERR(log, "ct family unspecified"); 16907f8a436eSJoe Stringer return -EINVAL; 16917f8a436eSJoe Stringer } 16927f8a436eSJoe Stringer 16937f8a436eSJoe Stringer memset(&ct_info, 0, sizeof(ct_info)); 16947f8a436eSJoe Stringer ct_info.family = family; 16957f8a436eSJoe Stringer 16967f8a436eSJoe Stringer nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID, 16977f8a436eSJoe Stringer NF_CT_DEFAULT_ZONE_DIR, 0); 16987f8a436eSJoe Stringer 1699cae3a262SJoe Stringer err = parse_ct(attr, &ct_info, &helper, log); 17007f8a436eSJoe Stringer if (err) 17017f8a436eSJoe Stringer return err; 17027f8a436eSJoe Stringer 17037f8a436eSJoe Stringer /* Set up template for tracking connections in specific zones. */ 17047f8a436eSJoe Stringer ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL); 17057f8a436eSJoe Stringer if (!ct_info.ct) { 17067f8a436eSJoe Stringer OVS_NLERR(log, "Failed to allocate conntrack template"); 17077f8a436eSJoe Stringer return -ENOMEM; 17087f8a436eSJoe Stringer } 170906bd2bdfSYi-Hung Wei 171006bd2bdfSYi-Hung Wei if (ct_info.timeout[0]) { 171106bd2bdfSYi-Hung Wei if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, 171206bd2bdfSYi-Hung Wei ct_info.timeout)) 171306bd2bdfSYi-Hung Wei pr_info_ratelimited("Failed to associated timeout " 171406bd2bdfSYi-Hung Wei "policy `%s'\n", ct_info.timeout); 171571778951SYi-Hung Wei else 171671778951SYi-Hung Wei ct_info.nf_ct_timeout = rcu_dereference( 171771778951SYi-Hung Wei nf_ct_timeout_find(ct_info.ct)->timeout); 171871778951SYi-Hung Wei 171906bd2bdfSYi-Hung Wei } 172006bd2bdfSYi-Hung Wei 1721cae3a262SJoe Stringer if (helper) { 1722cae3a262SJoe Stringer err = ovs_ct_add_helper(&ct_info, helper, key, log); 1723cae3a262SJoe Stringer if (err) 1724cae3a262SJoe Stringer goto err_free_ct; 1725cae3a262SJoe Stringer } 17267f8a436eSJoe Stringer 17277f8a436eSJoe Stringer err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info, 17287f8a436eSJoe Stringer sizeof(ct_info), log); 17297f8a436eSJoe Stringer if (err) 17307f8a436eSJoe Stringer goto err_free_ct; 17317f8a436eSJoe Stringer 17327f6d6558SFlavio Leitner __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); 17337f8a436eSJoe Stringer return 0; 17347f8a436eSJoe Stringer err_free_ct: 17352f3ab9f9SJoe Stringer __ovs_ct_free_action(&ct_info); 17367f8a436eSJoe Stringer return err; 17377f8a436eSJoe Stringer } 17387f8a436eSJoe Stringer 17394806e975SFlorian Westphal #if IS_ENABLED(CONFIG_NF_NAT) 174005752523SJarno Rajahalme static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, 174105752523SJarno Rajahalme struct sk_buff *skb) 174205752523SJarno Rajahalme { 174305752523SJarno Rajahalme struct nlattr *start; 174405752523SJarno Rajahalme 1745ae0be8deSMichal Kubecek start = nla_nest_start_noflag(skb, OVS_CT_ATTR_NAT); 174605752523SJarno Rajahalme if (!start) 174705752523SJarno Rajahalme return false; 174805752523SJarno Rajahalme 174905752523SJarno Rajahalme if (info->nat & OVS_CT_SRC_NAT) { 175005752523SJarno Rajahalme if (nla_put_flag(skb, OVS_NAT_ATTR_SRC)) 175105752523SJarno Rajahalme return false; 175205752523SJarno Rajahalme } else if (info->nat & OVS_CT_DST_NAT) { 175305752523SJarno Rajahalme if (nla_put_flag(skb, OVS_NAT_ATTR_DST)) 175405752523SJarno Rajahalme return false; 175505752523SJarno Rajahalme } else { 175605752523SJarno Rajahalme goto out; 175705752523SJarno Rajahalme } 175805752523SJarno Rajahalme 175905752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { 17603bf195aeSFlorian Westphal if (IS_ENABLED(CONFIG_NF_NAT) && 176199b7248eSArnd Bergmann info->family == NFPROTO_IPV4) { 176205752523SJarno Rajahalme if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, 176305752523SJarno Rajahalme info->range.min_addr.ip) || 176405752523SJarno Rajahalme (info->range.max_addr.ip 176505752523SJarno Rajahalme != info->range.min_addr.ip && 176605752523SJarno Rajahalme (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, 176705752523SJarno Rajahalme info->range.max_addr.ip)))) 176805752523SJarno Rajahalme return false; 17693bf195aeSFlorian Westphal } else if (IS_ENABLED(CONFIG_IPV6) && 177099b7248eSArnd Bergmann info->family == NFPROTO_IPV6) { 177105752523SJarno Rajahalme if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, 177205752523SJarno Rajahalme &info->range.min_addr.in6) || 177305752523SJarno Rajahalme (memcmp(&info->range.max_addr.in6, 177405752523SJarno Rajahalme &info->range.min_addr.in6, 177505752523SJarno Rajahalme sizeof(info->range.max_addr.in6)) && 177605752523SJarno Rajahalme (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, 177705752523SJarno Rajahalme &info->range.max_addr.in6)))) 177805752523SJarno Rajahalme return false; 177905752523SJarno Rajahalme } else { 178005752523SJarno Rajahalme return false; 178105752523SJarno Rajahalme } 178205752523SJarno Rajahalme } 178305752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 178405752523SJarno Rajahalme (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, 178505752523SJarno Rajahalme ntohs(info->range.min_proto.all)) || 178605752523SJarno Rajahalme (info->range.max_proto.all != info->range.min_proto.all && 178705752523SJarno Rajahalme nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, 178805752523SJarno Rajahalme ntohs(info->range.max_proto.all))))) 178905752523SJarno Rajahalme return false; 179005752523SJarno Rajahalme 179105752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PERSISTENT && 179205752523SJarno Rajahalme nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT)) 179305752523SJarno Rajahalme return false; 179405752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM && 179505752523SJarno Rajahalme nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH)) 179605752523SJarno Rajahalme return false; 179705752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY && 179805752523SJarno Rajahalme nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM)) 179905752523SJarno Rajahalme return false; 180005752523SJarno Rajahalme out: 180105752523SJarno Rajahalme nla_nest_end(skb, start); 180205752523SJarno Rajahalme 180305752523SJarno Rajahalme return true; 180405752523SJarno Rajahalme } 180505752523SJarno Rajahalme #endif 180605752523SJarno Rajahalme 18077f8a436eSJoe Stringer int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, 18087f8a436eSJoe Stringer struct sk_buff *skb) 18097f8a436eSJoe Stringer { 18107f8a436eSJoe Stringer struct nlattr *start; 18117f8a436eSJoe Stringer 1812ae0be8deSMichal Kubecek start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CT); 18137f8a436eSJoe Stringer if (!start) 18147f8a436eSJoe Stringer return -EMSGSIZE; 18157f8a436eSJoe Stringer 1816dd41d33fSJarno Rajahalme if (ct_info->commit && nla_put_flag(skb, ct_info->force 1817dd41d33fSJarno Rajahalme ? OVS_CT_ATTR_FORCE_COMMIT 1818dd41d33fSJarno Rajahalme : OVS_CT_ATTR_COMMIT)) 18197f8a436eSJoe Stringer return -EMSGSIZE; 18207f8a436eSJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 18217f8a436eSJoe Stringer nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) 18227f8a436eSJoe Stringer return -EMSGSIZE; 1823e754ec69SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask && 1824182e3042SJoe Stringer nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), 1825182e3042SJoe Stringer &ct_info->mark)) 1826182e3042SJoe Stringer return -EMSGSIZE; 1827c2ac6673SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1828e754ec69SJoe Stringer labels_nonzero(&ct_info->labels.mask) && 182933db4125SJoe Stringer nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), 183033db4125SJoe Stringer &ct_info->labels)) 1831c2ac6673SJoe Stringer return -EMSGSIZE; 1832cae3a262SJoe Stringer if (ct_info->helper) { 1833cae3a262SJoe Stringer if (nla_put_string(skb, OVS_CT_ATTR_HELPER, 1834cae3a262SJoe Stringer ct_info->helper->name)) 1835cae3a262SJoe Stringer return -EMSGSIZE; 1836cae3a262SJoe Stringer } 183712064551SJarno Rajahalme if (ct_info->have_eventmask && 183812064551SJarno Rajahalme nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask)) 183912064551SJarno Rajahalme return -EMSGSIZE; 184006bd2bdfSYi-Hung Wei if (ct_info->timeout[0]) { 184106bd2bdfSYi-Hung Wei if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout)) 184206bd2bdfSYi-Hung Wei return -EMSGSIZE; 184306bd2bdfSYi-Hung Wei } 184412064551SJarno Rajahalme 18454806e975SFlorian Westphal #if IS_ENABLED(CONFIG_NF_NAT) 184605752523SJarno Rajahalme if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) 184705752523SJarno Rajahalme return -EMSGSIZE; 184805752523SJarno Rajahalme #endif 18497f8a436eSJoe Stringer nla_nest_end(skb, start); 18507f8a436eSJoe Stringer 18517f8a436eSJoe Stringer return 0; 18527f8a436eSJoe Stringer } 18537f8a436eSJoe Stringer 18547f8a436eSJoe Stringer void ovs_ct_free_action(const struct nlattr *a) 18557f8a436eSJoe Stringer { 18567f8a436eSJoe Stringer struct ovs_conntrack_info *ct_info = nla_data(a); 18577f8a436eSJoe Stringer 18582f3ab9f9SJoe Stringer __ovs_ct_free_action(ct_info); 18592f3ab9f9SJoe Stringer } 18602f3ab9f9SJoe Stringer 18612f3ab9f9SJoe Stringer static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) 18622f3ab9f9SJoe Stringer { 1863fec9c271SFlavio Leitner if (ct_info->helper) { 1864f319ca65SGeert Uytterhoeven #if IS_ENABLED(CONFIG_NF_NAT) 1865fec9c271SFlavio Leitner if (ct_info->nat) 1866fec9c271SFlavio Leitner nf_nat_helper_put(ct_info->helper); 1867fec9c271SFlavio Leitner #endif 1868d91fc59cSLiping Zhang nf_conntrack_helper_put(ct_info->helper); 1869fec9c271SFlavio Leitner } 187006bd2bdfSYi-Hung Wei if (ct_info->ct) { 187106bd2bdfSYi-Hung Wei if (ct_info->timeout[0]) 187206bd2bdfSYi-Hung Wei nf_ct_destroy_timeout(ct_info->ct); 18736d670497SDan Carpenter nf_ct_tmpl_free(ct_info->ct); 187406bd2bdfSYi-Hung Wei } 18757f8a436eSJoe Stringer } 1876c2ac6673SJoe Stringer 187711efd5cbSYi-Hung Wei #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 187811efd5cbSYi-Hung Wei static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net) 187911efd5cbSYi-Hung Wei { 188011efd5cbSYi-Hung Wei int i, err; 188111efd5cbSYi-Hung Wei 188211efd5cbSYi-Hung Wei ovs_net->ct_limit_info = kmalloc(sizeof(*ovs_net->ct_limit_info), 188311efd5cbSYi-Hung Wei GFP_KERNEL); 188411efd5cbSYi-Hung Wei if (!ovs_net->ct_limit_info) 188511efd5cbSYi-Hung Wei return -ENOMEM; 188611efd5cbSYi-Hung Wei 188711efd5cbSYi-Hung Wei ovs_net->ct_limit_info->default_limit = OVS_CT_LIMIT_DEFAULT; 188811efd5cbSYi-Hung Wei ovs_net->ct_limit_info->limits = 188911efd5cbSYi-Hung Wei kmalloc_array(CT_LIMIT_HASH_BUCKETS, sizeof(struct hlist_head), 189011efd5cbSYi-Hung Wei GFP_KERNEL); 189111efd5cbSYi-Hung Wei if (!ovs_net->ct_limit_info->limits) { 189211efd5cbSYi-Hung Wei kfree(ovs_net->ct_limit_info); 189311efd5cbSYi-Hung Wei return -ENOMEM; 189411efd5cbSYi-Hung Wei } 189511efd5cbSYi-Hung Wei 189611efd5cbSYi-Hung Wei for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++) 189711efd5cbSYi-Hung Wei INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]); 189811efd5cbSYi-Hung Wei 189911efd5cbSYi-Hung Wei ovs_net->ct_limit_info->data = 190011efd5cbSYi-Hung Wei nf_conncount_init(net, NFPROTO_INET, sizeof(u32)); 190111efd5cbSYi-Hung Wei 190211efd5cbSYi-Hung Wei if (IS_ERR(ovs_net->ct_limit_info->data)) { 190311efd5cbSYi-Hung Wei err = PTR_ERR(ovs_net->ct_limit_info->data); 190411efd5cbSYi-Hung Wei kfree(ovs_net->ct_limit_info->limits); 190511efd5cbSYi-Hung Wei kfree(ovs_net->ct_limit_info); 190611efd5cbSYi-Hung Wei pr_err("openvswitch: failed to init nf_conncount %d\n", err); 190711efd5cbSYi-Hung Wei return err; 190811efd5cbSYi-Hung Wei } 190911efd5cbSYi-Hung Wei return 0; 191011efd5cbSYi-Hung Wei } 191111efd5cbSYi-Hung Wei 191211efd5cbSYi-Hung Wei static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) 191311efd5cbSYi-Hung Wei { 191411efd5cbSYi-Hung Wei const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info; 191511efd5cbSYi-Hung Wei int i; 191611efd5cbSYi-Hung Wei 191711efd5cbSYi-Hung Wei nf_conncount_destroy(net, NFPROTO_INET, info->data); 191811efd5cbSYi-Hung Wei for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { 191911efd5cbSYi-Hung Wei struct hlist_head *head = &info->limits[i]; 192011efd5cbSYi-Hung Wei struct ovs_ct_limit *ct_limit; 192111efd5cbSYi-Hung Wei 192227de77ceSTonghao Zhang hlist_for_each_entry_rcu(ct_limit, head, hlist_node, 192327de77ceSTonghao Zhang lockdep_ovsl_is_held()) 192411efd5cbSYi-Hung Wei kfree_rcu(ct_limit, rcu); 192511efd5cbSYi-Hung Wei } 19267b066d17SZeng Tao kfree(info->limits); 19277b066d17SZeng Tao kfree(info); 192811efd5cbSYi-Hung Wei } 192911efd5cbSYi-Hung Wei 193011efd5cbSYi-Hung Wei static struct sk_buff * 193111efd5cbSYi-Hung Wei ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd, 193211efd5cbSYi-Hung Wei struct ovs_header **ovs_reply_header) 193311efd5cbSYi-Hung Wei { 193411efd5cbSYi-Hung Wei struct ovs_header *ovs_header = info->userhdr; 193511efd5cbSYi-Hung Wei struct sk_buff *skb; 193611efd5cbSYi-Hung Wei 193711efd5cbSYi-Hung Wei skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 193811efd5cbSYi-Hung Wei if (!skb) 193911efd5cbSYi-Hung Wei return ERR_PTR(-ENOMEM); 194011efd5cbSYi-Hung Wei 194111efd5cbSYi-Hung Wei *ovs_reply_header = genlmsg_put(skb, info->snd_portid, 194211efd5cbSYi-Hung Wei info->snd_seq, 194311efd5cbSYi-Hung Wei &dp_ct_limit_genl_family, 0, cmd); 194411efd5cbSYi-Hung Wei 194511efd5cbSYi-Hung Wei if (!*ovs_reply_header) { 194611efd5cbSYi-Hung Wei nlmsg_free(skb); 194711efd5cbSYi-Hung Wei return ERR_PTR(-EMSGSIZE); 194811efd5cbSYi-Hung Wei } 194911efd5cbSYi-Hung Wei (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; 195011efd5cbSYi-Hung Wei 195111efd5cbSYi-Hung Wei return skb; 195211efd5cbSYi-Hung Wei } 195311efd5cbSYi-Hung Wei 195411efd5cbSYi-Hung Wei static bool check_zone_id(int zone_id, u16 *pzone) 195511efd5cbSYi-Hung Wei { 195611efd5cbSYi-Hung Wei if (zone_id >= 0 && zone_id <= 65535) { 195711efd5cbSYi-Hung Wei *pzone = (u16)zone_id; 195811efd5cbSYi-Hung Wei return true; 195911efd5cbSYi-Hung Wei } 196011efd5cbSYi-Hung Wei return false; 196111efd5cbSYi-Hung Wei } 196211efd5cbSYi-Hung Wei 196311efd5cbSYi-Hung Wei static int ovs_ct_limit_set_zone_limit(struct nlattr *nla_zone_limit, 196411efd5cbSYi-Hung Wei struct ovs_ct_limit_info *info) 196511efd5cbSYi-Hung Wei { 196611efd5cbSYi-Hung Wei struct ovs_zone_limit *zone_limit; 196711efd5cbSYi-Hung Wei int rem; 196811efd5cbSYi-Hung Wei u16 zone; 196911efd5cbSYi-Hung Wei 197011efd5cbSYi-Hung Wei rem = NLA_ALIGN(nla_len(nla_zone_limit)); 197111efd5cbSYi-Hung Wei zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit); 197211efd5cbSYi-Hung Wei 197311efd5cbSYi-Hung Wei while (rem >= sizeof(*zone_limit)) { 197411efd5cbSYi-Hung Wei if (unlikely(zone_limit->zone_id == 197511efd5cbSYi-Hung Wei OVS_ZONE_LIMIT_DEFAULT_ZONE)) { 197611efd5cbSYi-Hung Wei ovs_lock(); 197711efd5cbSYi-Hung Wei info->default_limit = zone_limit->limit; 197811efd5cbSYi-Hung Wei ovs_unlock(); 197911efd5cbSYi-Hung Wei } else if (unlikely(!check_zone_id( 198011efd5cbSYi-Hung Wei zone_limit->zone_id, &zone))) { 198111efd5cbSYi-Hung Wei OVS_NLERR(true, "zone id is out of range"); 198211efd5cbSYi-Hung Wei } else { 198311efd5cbSYi-Hung Wei struct ovs_ct_limit *ct_limit; 198411efd5cbSYi-Hung Wei 198511efd5cbSYi-Hung Wei ct_limit = kmalloc(sizeof(*ct_limit), GFP_KERNEL); 198611efd5cbSYi-Hung Wei if (!ct_limit) 198711efd5cbSYi-Hung Wei return -ENOMEM; 198811efd5cbSYi-Hung Wei 198911efd5cbSYi-Hung Wei ct_limit->zone = zone; 199011efd5cbSYi-Hung Wei ct_limit->limit = zone_limit->limit; 199111efd5cbSYi-Hung Wei 199211efd5cbSYi-Hung Wei ovs_lock(); 199311efd5cbSYi-Hung Wei ct_limit_set(info, ct_limit); 199411efd5cbSYi-Hung Wei ovs_unlock(); 199511efd5cbSYi-Hung Wei } 199611efd5cbSYi-Hung Wei rem -= NLA_ALIGN(sizeof(*zone_limit)); 199711efd5cbSYi-Hung Wei zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit + 199811efd5cbSYi-Hung Wei NLA_ALIGN(sizeof(*zone_limit))); 199911efd5cbSYi-Hung Wei } 200011efd5cbSYi-Hung Wei 200111efd5cbSYi-Hung Wei if (rem) 200211efd5cbSYi-Hung Wei OVS_NLERR(true, "set zone limit has %d unknown bytes", rem); 200311efd5cbSYi-Hung Wei 200411efd5cbSYi-Hung Wei return 0; 200511efd5cbSYi-Hung Wei } 200611efd5cbSYi-Hung Wei 200711efd5cbSYi-Hung Wei static int ovs_ct_limit_del_zone_limit(struct nlattr *nla_zone_limit, 200811efd5cbSYi-Hung Wei struct ovs_ct_limit_info *info) 200911efd5cbSYi-Hung Wei { 201011efd5cbSYi-Hung Wei struct ovs_zone_limit *zone_limit; 201111efd5cbSYi-Hung Wei int rem; 201211efd5cbSYi-Hung Wei u16 zone; 201311efd5cbSYi-Hung Wei 201411efd5cbSYi-Hung Wei rem = NLA_ALIGN(nla_len(nla_zone_limit)); 201511efd5cbSYi-Hung Wei zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit); 201611efd5cbSYi-Hung Wei 201711efd5cbSYi-Hung Wei while (rem >= sizeof(*zone_limit)) { 201811efd5cbSYi-Hung Wei if (unlikely(zone_limit->zone_id == 201911efd5cbSYi-Hung Wei OVS_ZONE_LIMIT_DEFAULT_ZONE)) { 202011efd5cbSYi-Hung Wei ovs_lock(); 202111efd5cbSYi-Hung Wei info->default_limit = OVS_CT_LIMIT_DEFAULT; 202211efd5cbSYi-Hung Wei ovs_unlock(); 202311efd5cbSYi-Hung Wei } else if (unlikely(!check_zone_id( 202411efd5cbSYi-Hung Wei zone_limit->zone_id, &zone))) { 202511efd5cbSYi-Hung Wei OVS_NLERR(true, "zone id is out of range"); 202611efd5cbSYi-Hung Wei } else { 202711efd5cbSYi-Hung Wei ovs_lock(); 202811efd5cbSYi-Hung Wei ct_limit_del(info, zone); 202911efd5cbSYi-Hung Wei ovs_unlock(); 203011efd5cbSYi-Hung Wei } 203111efd5cbSYi-Hung Wei rem -= NLA_ALIGN(sizeof(*zone_limit)); 203211efd5cbSYi-Hung Wei zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit + 203311efd5cbSYi-Hung Wei NLA_ALIGN(sizeof(*zone_limit))); 203411efd5cbSYi-Hung Wei } 203511efd5cbSYi-Hung Wei 203611efd5cbSYi-Hung Wei if (rem) 203711efd5cbSYi-Hung Wei OVS_NLERR(true, "del zone limit has %d unknown bytes", rem); 203811efd5cbSYi-Hung Wei 203911efd5cbSYi-Hung Wei return 0; 204011efd5cbSYi-Hung Wei } 204111efd5cbSYi-Hung Wei 204211efd5cbSYi-Hung Wei static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info, 204311efd5cbSYi-Hung Wei struct sk_buff *reply) 204411efd5cbSYi-Hung Wei { 20454d51419dSIlya Maximets struct ovs_zone_limit zone_limit = { 20464d51419dSIlya Maximets .zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE, 20474d51419dSIlya Maximets .limit = info->default_limit, 20484d51419dSIlya Maximets }; 204911efd5cbSYi-Hung Wei 20505e359044SZheng Yongjun return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); 205111efd5cbSYi-Hung Wei } 205211efd5cbSYi-Hung Wei 205311efd5cbSYi-Hung Wei static int __ovs_ct_limit_get_zone_limit(struct net *net, 205411efd5cbSYi-Hung Wei struct nf_conncount_data *data, 205511efd5cbSYi-Hung Wei u16 zone_id, u32 limit, 205611efd5cbSYi-Hung Wei struct sk_buff *reply) 205711efd5cbSYi-Hung Wei { 205811efd5cbSYi-Hung Wei struct nf_conntrack_zone ct_zone; 205911efd5cbSYi-Hung Wei struct ovs_zone_limit zone_limit; 206011efd5cbSYi-Hung Wei u32 conncount_key = zone_id; 206111efd5cbSYi-Hung Wei 206211efd5cbSYi-Hung Wei zone_limit.zone_id = zone_id; 206311efd5cbSYi-Hung Wei zone_limit.limit = limit; 206411efd5cbSYi-Hung Wei nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0); 206511efd5cbSYi-Hung Wei 206611efd5cbSYi-Hung Wei zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL, 206711efd5cbSYi-Hung Wei &ct_zone); 206811efd5cbSYi-Hung Wei return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); 206911efd5cbSYi-Hung Wei } 207011efd5cbSYi-Hung Wei 207111efd5cbSYi-Hung Wei static int ovs_ct_limit_get_zone_limit(struct net *net, 207211efd5cbSYi-Hung Wei struct nlattr *nla_zone_limit, 207311efd5cbSYi-Hung Wei struct ovs_ct_limit_info *info, 207411efd5cbSYi-Hung Wei struct sk_buff *reply) 207511efd5cbSYi-Hung Wei { 207611efd5cbSYi-Hung Wei struct ovs_zone_limit *zone_limit; 207711efd5cbSYi-Hung Wei int rem, err; 207811efd5cbSYi-Hung Wei u32 limit; 207911efd5cbSYi-Hung Wei u16 zone; 208011efd5cbSYi-Hung Wei 208111efd5cbSYi-Hung Wei rem = NLA_ALIGN(nla_len(nla_zone_limit)); 208211efd5cbSYi-Hung Wei zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit); 208311efd5cbSYi-Hung Wei 208411efd5cbSYi-Hung Wei while (rem >= sizeof(*zone_limit)) { 208511efd5cbSYi-Hung Wei if (unlikely(zone_limit->zone_id == 208611efd5cbSYi-Hung Wei OVS_ZONE_LIMIT_DEFAULT_ZONE)) { 208711efd5cbSYi-Hung Wei err = ovs_ct_limit_get_default_limit(info, reply); 208811efd5cbSYi-Hung Wei if (err) 208911efd5cbSYi-Hung Wei return err; 209011efd5cbSYi-Hung Wei } else if (unlikely(!check_zone_id(zone_limit->zone_id, 209111efd5cbSYi-Hung Wei &zone))) { 209211efd5cbSYi-Hung Wei OVS_NLERR(true, "zone id is out of range"); 209311efd5cbSYi-Hung Wei } else { 209411efd5cbSYi-Hung Wei rcu_read_lock(); 209511efd5cbSYi-Hung Wei limit = ct_limit_get(info, zone); 209611efd5cbSYi-Hung Wei rcu_read_unlock(); 209711efd5cbSYi-Hung Wei 209811efd5cbSYi-Hung Wei err = __ovs_ct_limit_get_zone_limit( 209911efd5cbSYi-Hung Wei net, info->data, zone, limit, reply); 210011efd5cbSYi-Hung Wei if (err) 210111efd5cbSYi-Hung Wei return err; 210211efd5cbSYi-Hung Wei } 210311efd5cbSYi-Hung Wei rem -= NLA_ALIGN(sizeof(*zone_limit)); 210411efd5cbSYi-Hung Wei zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit + 210511efd5cbSYi-Hung Wei NLA_ALIGN(sizeof(*zone_limit))); 210611efd5cbSYi-Hung Wei } 210711efd5cbSYi-Hung Wei 210811efd5cbSYi-Hung Wei if (rem) 210911efd5cbSYi-Hung Wei OVS_NLERR(true, "get zone limit has %d unknown bytes", rem); 211011efd5cbSYi-Hung Wei 211111efd5cbSYi-Hung Wei return 0; 211211efd5cbSYi-Hung Wei } 211311efd5cbSYi-Hung Wei 211411efd5cbSYi-Hung Wei static int ovs_ct_limit_get_all_zone_limit(struct net *net, 211511efd5cbSYi-Hung Wei struct ovs_ct_limit_info *info, 211611efd5cbSYi-Hung Wei struct sk_buff *reply) 211711efd5cbSYi-Hung Wei { 211811efd5cbSYi-Hung Wei struct ovs_ct_limit *ct_limit; 211911efd5cbSYi-Hung Wei struct hlist_head *head; 212011efd5cbSYi-Hung Wei int i, err = 0; 212111efd5cbSYi-Hung Wei 212211efd5cbSYi-Hung Wei err = ovs_ct_limit_get_default_limit(info, reply); 212311efd5cbSYi-Hung Wei if (err) 212411efd5cbSYi-Hung Wei return err; 212511efd5cbSYi-Hung Wei 212611efd5cbSYi-Hung Wei rcu_read_lock(); 212711efd5cbSYi-Hung Wei for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { 212811efd5cbSYi-Hung Wei head = &info->limits[i]; 212911efd5cbSYi-Hung Wei hlist_for_each_entry_rcu(ct_limit, head, hlist_node) { 213011efd5cbSYi-Hung Wei err = __ovs_ct_limit_get_zone_limit(net, info->data, 213111efd5cbSYi-Hung Wei ct_limit->zone, ct_limit->limit, reply); 213211efd5cbSYi-Hung Wei if (err) 213311efd5cbSYi-Hung Wei goto exit_err; 213411efd5cbSYi-Hung Wei } 213511efd5cbSYi-Hung Wei } 213611efd5cbSYi-Hung Wei 213711efd5cbSYi-Hung Wei exit_err: 213811efd5cbSYi-Hung Wei rcu_read_unlock(); 213911efd5cbSYi-Hung Wei return err; 214011efd5cbSYi-Hung Wei } 214111efd5cbSYi-Hung Wei 214211efd5cbSYi-Hung Wei static int ovs_ct_limit_cmd_set(struct sk_buff *skb, struct genl_info *info) 214311efd5cbSYi-Hung Wei { 214411efd5cbSYi-Hung Wei struct nlattr **a = info->attrs; 214511efd5cbSYi-Hung Wei struct sk_buff *reply; 214611efd5cbSYi-Hung Wei struct ovs_header *ovs_reply_header; 214711efd5cbSYi-Hung Wei struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 214811efd5cbSYi-Hung Wei struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; 214911efd5cbSYi-Hung Wei int err; 215011efd5cbSYi-Hung Wei 215111efd5cbSYi-Hung Wei reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_SET, 215211efd5cbSYi-Hung Wei &ovs_reply_header); 215311efd5cbSYi-Hung Wei if (IS_ERR(reply)) 215411efd5cbSYi-Hung Wei return PTR_ERR(reply); 215511efd5cbSYi-Hung Wei 215611efd5cbSYi-Hung Wei if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { 215711efd5cbSYi-Hung Wei err = -EINVAL; 215811efd5cbSYi-Hung Wei goto exit_err; 215911efd5cbSYi-Hung Wei } 216011efd5cbSYi-Hung Wei 216111efd5cbSYi-Hung Wei err = ovs_ct_limit_set_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], 216211efd5cbSYi-Hung Wei ct_limit_info); 216311efd5cbSYi-Hung Wei if (err) 216411efd5cbSYi-Hung Wei goto exit_err; 216511efd5cbSYi-Hung Wei 216611efd5cbSYi-Hung Wei static_branch_enable(&ovs_ct_limit_enabled); 216711efd5cbSYi-Hung Wei 216811efd5cbSYi-Hung Wei genlmsg_end(reply, ovs_reply_header); 216911efd5cbSYi-Hung Wei return genlmsg_reply(reply, info); 217011efd5cbSYi-Hung Wei 217111efd5cbSYi-Hung Wei exit_err: 217211efd5cbSYi-Hung Wei nlmsg_free(reply); 217311efd5cbSYi-Hung Wei return err; 217411efd5cbSYi-Hung Wei } 217511efd5cbSYi-Hung Wei 217611efd5cbSYi-Hung Wei static int ovs_ct_limit_cmd_del(struct sk_buff *skb, struct genl_info *info) 217711efd5cbSYi-Hung Wei { 217811efd5cbSYi-Hung Wei struct nlattr **a = info->attrs; 217911efd5cbSYi-Hung Wei struct sk_buff *reply; 218011efd5cbSYi-Hung Wei struct ovs_header *ovs_reply_header; 218111efd5cbSYi-Hung Wei struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 218211efd5cbSYi-Hung Wei struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; 218311efd5cbSYi-Hung Wei int err; 218411efd5cbSYi-Hung Wei 218511efd5cbSYi-Hung Wei reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_DEL, 218611efd5cbSYi-Hung Wei &ovs_reply_header); 218711efd5cbSYi-Hung Wei if (IS_ERR(reply)) 218811efd5cbSYi-Hung Wei return PTR_ERR(reply); 218911efd5cbSYi-Hung Wei 219011efd5cbSYi-Hung Wei if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { 219111efd5cbSYi-Hung Wei err = -EINVAL; 219211efd5cbSYi-Hung Wei goto exit_err; 219311efd5cbSYi-Hung Wei } 219411efd5cbSYi-Hung Wei 219511efd5cbSYi-Hung Wei err = ovs_ct_limit_del_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], 219611efd5cbSYi-Hung Wei ct_limit_info); 219711efd5cbSYi-Hung Wei if (err) 219811efd5cbSYi-Hung Wei goto exit_err; 219911efd5cbSYi-Hung Wei 220011efd5cbSYi-Hung Wei genlmsg_end(reply, ovs_reply_header); 220111efd5cbSYi-Hung Wei return genlmsg_reply(reply, info); 220211efd5cbSYi-Hung Wei 220311efd5cbSYi-Hung Wei exit_err: 220411efd5cbSYi-Hung Wei nlmsg_free(reply); 220511efd5cbSYi-Hung Wei return err; 220611efd5cbSYi-Hung Wei } 220711efd5cbSYi-Hung Wei 220811efd5cbSYi-Hung Wei static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info) 220911efd5cbSYi-Hung Wei { 221011efd5cbSYi-Hung Wei struct nlattr **a = info->attrs; 221111efd5cbSYi-Hung Wei struct nlattr *nla_reply; 221211efd5cbSYi-Hung Wei struct sk_buff *reply; 221311efd5cbSYi-Hung Wei struct ovs_header *ovs_reply_header; 221411efd5cbSYi-Hung Wei struct net *net = sock_net(skb->sk); 221511efd5cbSYi-Hung Wei struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 221611efd5cbSYi-Hung Wei struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; 221711efd5cbSYi-Hung Wei int err; 221811efd5cbSYi-Hung Wei 221911efd5cbSYi-Hung Wei reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_GET, 222011efd5cbSYi-Hung Wei &ovs_reply_header); 222111efd5cbSYi-Hung Wei if (IS_ERR(reply)) 222211efd5cbSYi-Hung Wei return PTR_ERR(reply); 222311efd5cbSYi-Hung Wei 2224ae0be8deSMichal Kubecek nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); 2225ca965346SColin Ian King if (!nla_reply) { 2226ca965346SColin Ian King err = -EMSGSIZE; 2227ca965346SColin Ian King goto exit_err; 2228ca965346SColin Ian King } 222911efd5cbSYi-Hung Wei 223011efd5cbSYi-Hung Wei if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { 223111efd5cbSYi-Hung Wei err = ovs_ct_limit_get_zone_limit( 223211efd5cbSYi-Hung Wei net, a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], ct_limit_info, 223311efd5cbSYi-Hung Wei reply); 223411efd5cbSYi-Hung Wei if (err) 223511efd5cbSYi-Hung Wei goto exit_err; 223611efd5cbSYi-Hung Wei } else { 223711efd5cbSYi-Hung Wei err = ovs_ct_limit_get_all_zone_limit(net, ct_limit_info, 223811efd5cbSYi-Hung Wei reply); 223911efd5cbSYi-Hung Wei if (err) 224011efd5cbSYi-Hung Wei goto exit_err; 224111efd5cbSYi-Hung Wei } 224211efd5cbSYi-Hung Wei 224311efd5cbSYi-Hung Wei nla_nest_end(reply, nla_reply); 224411efd5cbSYi-Hung Wei genlmsg_end(reply, ovs_reply_header); 224511efd5cbSYi-Hung Wei return genlmsg_reply(reply, info); 224611efd5cbSYi-Hung Wei 224711efd5cbSYi-Hung Wei exit_err: 224811efd5cbSYi-Hung Wei nlmsg_free(reply); 224911efd5cbSYi-Hung Wei return err; 225011efd5cbSYi-Hung Wei } 225111efd5cbSYi-Hung Wei 2252b980b313SRikard Falkeborn static const struct genl_small_ops ct_limit_genl_ops[] = { 225311efd5cbSYi-Hung Wei { .cmd = OVS_CT_LIMIT_CMD_SET, 2254ef6243acSJohannes Berg .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 225511efd5cbSYi-Hung Wei .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN 225611efd5cbSYi-Hung Wei * privilege. */ 225711efd5cbSYi-Hung Wei .doit = ovs_ct_limit_cmd_set, 225811efd5cbSYi-Hung Wei }, 225911efd5cbSYi-Hung Wei { .cmd = OVS_CT_LIMIT_CMD_DEL, 2260ef6243acSJohannes Berg .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 226111efd5cbSYi-Hung Wei .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN 226211efd5cbSYi-Hung Wei * privilege. */ 226311efd5cbSYi-Hung Wei .doit = ovs_ct_limit_cmd_del, 226411efd5cbSYi-Hung Wei }, 226511efd5cbSYi-Hung Wei { .cmd = OVS_CT_LIMIT_CMD_GET, 2266ef6243acSJohannes Berg .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 226711efd5cbSYi-Hung Wei .flags = 0, /* OK for unprivileged users. */ 226811efd5cbSYi-Hung Wei .doit = ovs_ct_limit_cmd_get, 226911efd5cbSYi-Hung Wei }, 227011efd5cbSYi-Hung Wei }; 227111efd5cbSYi-Hung Wei 227211efd5cbSYi-Hung Wei static const struct genl_multicast_group ovs_ct_limit_multicast_group = { 227311efd5cbSYi-Hung Wei .name = OVS_CT_LIMIT_MCGROUP, 227411efd5cbSYi-Hung Wei }; 227511efd5cbSYi-Hung Wei 227611efd5cbSYi-Hung Wei struct genl_family dp_ct_limit_genl_family __ro_after_init = { 227711efd5cbSYi-Hung Wei .hdrsize = sizeof(struct ovs_header), 227811efd5cbSYi-Hung Wei .name = OVS_CT_LIMIT_FAMILY, 227911efd5cbSYi-Hung Wei .version = OVS_CT_LIMIT_VERSION, 228011efd5cbSYi-Hung Wei .maxattr = OVS_CT_LIMIT_ATTR_MAX, 22813b0f31f2SJohannes Berg .policy = ct_limit_policy, 228211efd5cbSYi-Hung Wei .netnsok = true, 228311efd5cbSYi-Hung Wei .parallel_ops = true, 228466a9b928SJakub Kicinski .small_ops = ct_limit_genl_ops, 228566a9b928SJakub Kicinski .n_small_ops = ARRAY_SIZE(ct_limit_genl_ops), 2286*9c5d03d3SJakub Kicinski .resv_start_op = OVS_CT_LIMIT_CMD_GET + 1, 228711efd5cbSYi-Hung Wei .mcgrps = &ovs_ct_limit_multicast_group, 228811efd5cbSYi-Hung Wei .n_mcgrps = 1, 228911efd5cbSYi-Hung Wei .module = THIS_MODULE, 229011efd5cbSYi-Hung Wei }; 229111efd5cbSYi-Hung Wei #endif 229211efd5cbSYi-Hung Wei 229311efd5cbSYi-Hung Wei int ovs_ct_init(struct net *net) 2294c2ac6673SJoe Stringer { 229533db4125SJoe Stringer unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; 2296c2ac6673SJoe Stringer struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2297c2ac6673SJoe Stringer 2298adff6c65SFlorian Westphal if (nf_connlabels_get(net, n_bits - 1)) { 2299c2ac6673SJoe Stringer ovs_net->xt_label = false; 2300c2ac6673SJoe Stringer OVS_NLERR(true, "Failed to set connlabel length"); 2301c2ac6673SJoe Stringer } else { 2302c2ac6673SJoe Stringer ovs_net->xt_label = true; 2303c2ac6673SJoe Stringer } 230411efd5cbSYi-Hung Wei 230511efd5cbSYi-Hung Wei #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 230611efd5cbSYi-Hung Wei return ovs_ct_limit_init(net, ovs_net); 230711efd5cbSYi-Hung Wei #else 230811efd5cbSYi-Hung Wei return 0; 230911efd5cbSYi-Hung Wei #endif 2310c2ac6673SJoe Stringer } 2311c2ac6673SJoe Stringer 2312c2ac6673SJoe Stringer void ovs_ct_exit(struct net *net) 2313c2ac6673SJoe Stringer { 2314c2ac6673SJoe Stringer struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2315c2ac6673SJoe Stringer 231611efd5cbSYi-Hung Wei #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 231711efd5cbSYi-Hung Wei ovs_ct_limit_exit(net, ovs_net); 231811efd5cbSYi-Hung Wei #endif 231911efd5cbSYi-Hung Wei 2320c2ac6673SJoe Stringer if (ovs_net->xt_label) 2321c2ac6673SJoe Stringer nf_connlabels_put(net); 2322c2ac6673SJoe Stringer } 2323