17f8a436eSJoe Stringer /* 27f8a436eSJoe Stringer * Copyright (c) 2015 Nicira, Inc. 37f8a436eSJoe Stringer * 47f8a436eSJoe Stringer * This program is free software; you can redistribute it and/or 57f8a436eSJoe Stringer * modify it under the terms of version 2 of the GNU General Public 67f8a436eSJoe Stringer * License as published by the Free Software Foundation. 77f8a436eSJoe Stringer * 87f8a436eSJoe Stringer * This program is distributed in the hope that it will be useful, but 97f8a436eSJoe Stringer * WITHOUT ANY WARRANTY; without even the implied warranty of 107f8a436eSJoe Stringer * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 117f8a436eSJoe Stringer * General Public License for more details. 127f8a436eSJoe Stringer */ 137f8a436eSJoe Stringer 147f8a436eSJoe Stringer #include <linux/module.h> 157f8a436eSJoe Stringer #include <linux/openvswitch.h> 1605752523SJarno Rajahalme #include <linux/tcp.h> 1705752523SJarno Rajahalme #include <linux/udp.h> 1805752523SJarno Rajahalme #include <linux/sctp.h> 197f8a436eSJoe Stringer #include <net/ip.h> 207f8a436eSJoe Stringer #include <net/netfilter/nf_conntrack_core.h> 21cae3a262SJoe Stringer #include <net/netfilter/nf_conntrack_helper.h> 22c2ac6673SJoe Stringer #include <net/netfilter/nf_conntrack_labels.h> 2305752523SJarno Rajahalme #include <net/netfilter/nf_conntrack_seqadj.h> 247f8a436eSJoe Stringer #include <net/netfilter/nf_conntrack_zones.h> 257f8a436eSJoe Stringer #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 267f8a436eSJoe Stringer 2705752523SJarno Rajahalme #ifdef CONFIG_NF_NAT_NEEDED 2805752523SJarno Rajahalme #include <linux/netfilter/nf_nat.h> 2905752523SJarno Rajahalme #include <net/netfilter/nf_nat_core.h> 3005752523SJarno Rajahalme #include <net/netfilter/nf_nat_l3proto.h> 3105752523SJarno Rajahalme #endif 3205752523SJarno Rajahalme 337f8a436eSJoe Stringer #include "datapath.h" 347f8a436eSJoe Stringer #include "conntrack.h" 357f8a436eSJoe Stringer #include "flow.h" 367f8a436eSJoe Stringer #include "flow_netlink.h" 377f8a436eSJoe Stringer 387f8a436eSJoe Stringer struct ovs_ct_len_tbl { 3905752523SJarno Rajahalme int maxlen; 4005752523SJarno Rajahalme int minlen; 417f8a436eSJoe Stringer }; 427f8a436eSJoe Stringer 43182e3042SJoe Stringer /* Metadata mark for masked write to conntrack mark */ 44182e3042SJoe Stringer struct md_mark { 45182e3042SJoe Stringer u32 value; 46182e3042SJoe Stringer u32 mask; 47182e3042SJoe Stringer }; 48182e3042SJoe Stringer 49c2ac6673SJoe Stringer /* Metadata label for masked write to conntrack label. */ 5033db4125SJoe Stringer struct md_labels { 5133db4125SJoe Stringer struct ovs_key_ct_labels value; 5233db4125SJoe Stringer struct ovs_key_ct_labels mask; 53c2ac6673SJoe Stringer }; 54c2ac6673SJoe Stringer 5505752523SJarno Rajahalme enum ovs_ct_nat { 5605752523SJarno Rajahalme OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */ 5705752523SJarno Rajahalme OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */ 5805752523SJarno Rajahalme OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */ 5905752523SJarno Rajahalme }; 6005752523SJarno Rajahalme 617f8a436eSJoe Stringer /* Conntrack action context for execution. */ 627f8a436eSJoe Stringer struct ovs_conntrack_info { 63cae3a262SJoe Stringer struct nf_conntrack_helper *helper; 647f8a436eSJoe Stringer struct nf_conntrack_zone zone; 657f8a436eSJoe Stringer struct nf_conn *ct; 66ab38a7b5SJoe Stringer u8 commit : 1; 6705752523SJarno Rajahalme u8 nat : 3; /* enum ovs_ct_nat */ 68dd41d33fSJarno Rajahalme u8 force : 1; 6912064551SJarno Rajahalme u8 have_eventmask : 1; 707f8a436eSJoe Stringer u16 family; 7112064551SJarno Rajahalme u32 eventmask; /* Mask of 1 << IPCT_*. */ 72182e3042SJoe Stringer struct md_mark mark; 7333db4125SJoe Stringer struct md_labels labels; 7405752523SJarno Rajahalme #ifdef CONFIG_NF_NAT_NEEDED 7505752523SJarno Rajahalme struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */ 7605752523SJarno Rajahalme #endif 777f8a436eSJoe Stringer }; 787f8a436eSJoe Stringer 7909aa98adSJarno Rajahalme static bool labels_nonzero(const struct ovs_key_ct_labels *labels); 8009aa98adSJarno Rajahalme 812f3ab9f9SJoe Stringer static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); 822f3ab9f9SJoe Stringer 837f8a436eSJoe Stringer static u16 key_to_nfproto(const struct sw_flow_key *key) 847f8a436eSJoe Stringer { 857f8a436eSJoe Stringer switch (ntohs(key->eth.type)) { 867f8a436eSJoe Stringer case ETH_P_IP: 877f8a436eSJoe Stringer return NFPROTO_IPV4; 887f8a436eSJoe Stringer case ETH_P_IPV6: 897f8a436eSJoe Stringer return NFPROTO_IPV6; 907f8a436eSJoe Stringer default: 917f8a436eSJoe Stringer return NFPROTO_UNSPEC; 927f8a436eSJoe Stringer } 937f8a436eSJoe Stringer } 947f8a436eSJoe Stringer 957f8a436eSJoe Stringer /* Map SKB connection state into the values used by flow definition. */ 967f8a436eSJoe Stringer static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) 977f8a436eSJoe Stringer { 987f8a436eSJoe Stringer u8 ct_state = OVS_CS_F_TRACKED; 997f8a436eSJoe Stringer 1007f8a436eSJoe Stringer switch (ctinfo) { 1017f8a436eSJoe Stringer case IP_CT_ESTABLISHED_REPLY: 1027f8a436eSJoe Stringer case IP_CT_RELATED_REPLY: 1037f8a436eSJoe Stringer ct_state |= OVS_CS_F_REPLY_DIR; 1047f8a436eSJoe Stringer break; 1057f8a436eSJoe Stringer default: 1067f8a436eSJoe Stringer break; 1077f8a436eSJoe Stringer } 1087f8a436eSJoe Stringer 1097f8a436eSJoe Stringer switch (ctinfo) { 1107f8a436eSJoe Stringer case IP_CT_ESTABLISHED: 1117f8a436eSJoe Stringer case IP_CT_ESTABLISHED_REPLY: 1127f8a436eSJoe Stringer ct_state |= OVS_CS_F_ESTABLISHED; 1137f8a436eSJoe Stringer break; 1147f8a436eSJoe Stringer case IP_CT_RELATED: 1157f8a436eSJoe Stringer case IP_CT_RELATED_REPLY: 1167f8a436eSJoe Stringer ct_state |= OVS_CS_F_RELATED; 1177f8a436eSJoe Stringer break; 1187f8a436eSJoe Stringer case IP_CT_NEW: 1197f8a436eSJoe Stringer ct_state |= OVS_CS_F_NEW; 1207f8a436eSJoe Stringer break; 1217f8a436eSJoe Stringer default: 1227f8a436eSJoe Stringer break; 1237f8a436eSJoe Stringer } 1247f8a436eSJoe Stringer 1257f8a436eSJoe Stringer return ct_state; 1267f8a436eSJoe Stringer } 1277f8a436eSJoe Stringer 1280d5cdef8SJoe Stringer static u32 ovs_ct_get_mark(const struct nf_conn *ct) 1290d5cdef8SJoe Stringer { 1300d5cdef8SJoe Stringer #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 1310d5cdef8SJoe Stringer return ct ? ct->mark : 0; 1320d5cdef8SJoe Stringer #else 1330d5cdef8SJoe Stringer return 0; 1340d5cdef8SJoe Stringer #endif 1350d5cdef8SJoe Stringer } 1360d5cdef8SJoe Stringer 137b87cec38SJarno Rajahalme /* Guard against conntrack labels max size shrinking below 128 bits. */ 138b87cec38SJarno Rajahalme #if NF_CT_LABELS_MAX_SIZE < 16 139b87cec38SJarno Rajahalme #error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes 140b87cec38SJarno Rajahalme #endif 141b87cec38SJarno Rajahalme 14233db4125SJoe Stringer static void ovs_ct_get_labels(const struct nf_conn *ct, 14333db4125SJoe Stringer struct ovs_key_ct_labels *labels) 144c2ac6673SJoe Stringer { 145c2ac6673SJoe Stringer struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; 146c2ac6673SJoe Stringer 147b87cec38SJarno Rajahalme if (cl) 148b87cec38SJarno Rajahalme memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); 149b87cec38SJarno Rajahalme else 15033db4125SJoe Stringer memset(labels, 0, OVS_CT_LABELS_LEN); 151c2ac6673SJoe Stringer } 152c2ac6673SJoe Stringer 1539dd7f890SJarno Rajahalme static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key, 1549dd7f890SJarno Rajahalme const struct nf_conntrack_tuple *orig, 1559dd7f890SJarno Rajahalme u8 icmp_proto) 1569dd7f890SJarno Rajahalme { 157316d4d78SJarno Rajahalme key->ct_orig_proto = orig->dst.protonum; 1589dd7f890SJarno Rajahalme if (orig->dst.protonum == icmp_proto) { 1599dd7f890SJarno Rajahalme key->ct.orig_tp.src = htons(orig->dst.u.icmp.type); 1609dd7f890SJarno Rajahalme key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code); 1619dd7f890SJarno Rajahalme } else { 1629dd7f890SJarno Rajahalme key->ct.orig_tp.src = orig->src.u.all; 1639dd7f890SJarno Rajahalme key->ct.orig_tp.dst = orig->dst.u.all; 1649dd7f890SJarno Rajahalme } 1659dd7f890SJarno Rajahalme } 1669dd7f890SJarno Rajahalme 1677f8a436eSJoe Stringer static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, 168182e3042SJoe Stringer const struct nf_conntrack_zone *zone, 169182e3042SJoe Stringer const struct nf_conn *ct) 1707f8a436eSJoe Stringer { 171316d4d78SJarno Rajahalme key->ct_state = state; 172316d4d78SJarno Rajahalme key->ct_zone = zone->id; 1730d5cdef8SJoe Stringer key->ct.mark = ovs_ct_get_mark(ct); 17433db4125SJoe Stringer ovs_ct_get_labels(ct, &key->ct.labels); 1759dd7f890SJarno Rajahalme 1769dd7f890SJarno Rajahalme if (ct) { 1779dd7f890SJarno Rajahalme const struct nf_conntrack_tuple *orig; 1789dd7f890SJarno Rajahalme 1799dd7f890SJarno Rajahalme /* Use the master if we have one. */ 1809dd7f890SJarno Rajahalme if (ct->master) 1819dd7f890SJarno Rajahalme ct = ct->master; 1829dd7f890SJarno Rajahalme orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 1839dd7f890SJarno Rajahalme 1849dd7f890SJarno Rajahalme /* IP version must match with the master connection. */ 1859dd7f890SJarno Rajahalme if (key->eth.type == htons(ETH_P_IP) && 1869dd7f890SJarno Rajahalme nf_ct_l3num(ct) == NFPROTO_IPV4) { 1879dd7f890SJarno Rajahalme key->ipv4.ct_orig.src = orig->src.u3.ip; 1889dd7f890SJarno Rajahalme key->ipv4.ct_orig.dst = orig->dst.u3.ip; 1899dd7f890SJarno Rajahalme __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP); 1909dd7f890SJarno Rajahalme return; 1919dd7f890SJarno Rajahalme } else if (key->eth.type == htons(ETH_P_IPV6) && 1929dd7f890SJarno Rajahalme !sw_flow_key_is_nd(key) && 1939dd7f890SJarno Rajahalme nf_ct_l3num(ct) == NFPROTO_IPV6) { 1949dd7f890SJarno Rajahalme key->ipv6.ct_orig.src = orig->src.u3.in6; 1959dd7f890SJarno Rajahalme key->ipv6.ct_orig.dst = orig->dst.u3.in6; 1969dd7f890SJarno Rajahalme __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP); 1979dd7f890SJarno Rajahalme return; 1989dd7f890SJarno Rajahalme } 1999dd7f890SJarno Rajahalme } 200316d4d78SJarno Rajahalme /* Clear 'ct_orig_proto' to mark the non-existence of conntrack 2019dd7f890SJarno Rajahalme * original direction key fields. 2029dd7f890SJarno Rajahalme */ 203316d4d78SJarno Rajahalme key->ct_orig_proto = 0; 2047f8a436eSJoe Stringer } 2057f8a436eSJoe Stringer 2065e17da63SJarno Rajahalme /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has 20705752523SJarno Rajahalme * previously sent the packet to conntrack via the ct action. If 20805752523SJarno Rajahalme * 'keep_nat_flags' is true, the existing NAT flags retained, else they are 20905752523SJarno Rajahalme * initialized from the connection status. 2107f8a436eSJoe Stringer */ 2117f8a436eSJoe Stringer static void ovs_ct_update_key(const struct sk_buff *skb, 212d110986cSJoe Stringer const struct ovs_conntrack_info *info, 21305752523SJarno Rajahalme struct sw_flow_key *key, bool post_ct, 21405752523SJarno Rajahalme bool keep_nat_flags) 2157f8a436eSJoe Stringer { 2167f8a436eSJoe Stringer const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; 2177f8a436eSJoe Stringer enum ip_conntrack_info ctinfo; 2187f8a436eSJoe Stringer struct nf_conn *ct; 2197f8a436eSJoe Stringer u8 state = 0; 2207f8a436eSJoe Stringer 2217f8a436eSJoe Stringer ct = nf_ct_get(skb, &ctinfo); 2227f8a436eSJoe Stringer if (ct) { 2237f8a436eSJoe Stringer state = ovs_ct_get_state(ctinfo); 2249f13ded8SJarno Rajahalme /* All unconfirmed entries are NEW connections. */ 2254f0909eeSJoe Stringer if (!nf_ct_is_confirmed(ct)) 2264f0909eeSJoe Stringer state |= OVS_CS_F_NEW; 2279f13ded8SJarno Rajahalme /* OVS persists the related flag for the duration of the 2289f13ded8SJarno Rajahalme * connection. 2299f13ded8SJarno Rajahalme */ 2307f8a436eSJoe Stringer if (ct->master) 2317f8a436eSJoe Stringer state |= OVS_CS_F_RELATED; 23205752523SJarno Rajahalme if (keep_nat_flags) { 233316d4d78SJarno Rajahalme state |= key->ct_state & OVS_CS_F_NAT_MASK; 23405752523SJarno Rajahalme } else { 23505752523SJarno Rajahalme if (ct->status & IPS_SRC_NAT) 23605752523SJarno Rajahalme state |= OVS_CS_F_SRC_NAT; 23705752523SJarno Rajahalme if (ct->status & IPS_DST_NAT) 23805752523SJarno Rajahalme state |= OVS_CS_F_DST_NAT; 23905752523SJarno Rajahalme } 2407f8a436eSJoe Stringer zone = nf_ct_zone(ct); 2417f8a436eSJoe Stringer } else if (post_ct) { 2427f8a436eSJoe Stringer state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; 243d110986cSJoe Stringer if (info) 244d110986cSJoe Stringer zone = &info->zone; 2457f8a436eSJoe Stringer } 246182e3042SJoe Stringer __ovs_ct_update_key(key, state, zone, ct); 2477f8a436eSJoe Stringer } 2487f8a436eSJoe Stringer 2499f13ded8SJarno Rajahalme /* This is called to initialize CT key fields possibly coming in from the local 2509f13ded8SJarno Rajahalme * stack. 2519f13ded8SJarno Rajahalme */ 2527f8a436eSJoe Stringer void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) 2537f8a436eSJoe Stringer { 25405752523SJarno Rajahalme ovs_ct_update_key(skb, NULL, key, false, false); 2557f8a436eSJoe Stringer } 2567f8a436eSJoe Stringer 2579dd7f890SJarno Rajahalme #define IN6_ADDR_INITIALIZER(ADDR) \ 2589dd7f890SJarno Rajahalme { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \ 2599dd7f890SJarno Rajahalme (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] } 2609dd7f890SJarno Rajahalme 2619dd7f890SJarno Rajahalme int ovs_ct_put_key(const struct sw_flow_key *swkey, 2629dd7f890SJarno Rajahalme const struct sw_flow_key *output, struct sk_buff *skb) 2637f8a436eSJoe Stringer { 264316d4d78SJarno Rajahalme if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state)) 2657f8a436eSJoe Stringer return -EMSGSIZE; 2667f8a436eSJoe Stringer 2677f8a436eSJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 268316d4d78SJarno Rajahalme nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone)) 2697f8a436eSJoe Stringer return -EMSGSIZE; 2707f8a436eSJoe Stringer 271182e3042SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 2729dd7f890SJarno Rajahalme nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark)) 273182e3042SJoe Stringer return -EMSGSIZE; 274182e3042SJoe Stringer 2759723e6abSValentin Rothberg if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 2769dd7f890SJarno Rajahalme nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels), 2779dd7f890SJarno Rajahalme &output->ct.labels)) 278c2ac6673SJoe Stringer return -EMSGSIZE; 279c2ac6673SJoe Stringer 280316d4d78SJarno Rajahalme if (swkey->ct_orig_proto) { 2819dd7f890SJarno Rajahalme if (swkey->eth.type == htons(ETH_P_IP)) { 2829dd7f890SJarno Rajahalme struct ovs_key_ct_tuple_ipv4 orig = { 2839dd7f890SJarno Rajahalme output->ipv4.ct_orig.src, 2849dd7f890SJarno Rajahalme output->ipv4.ct_orig.dst, 2859dd7f890SJarno Rajahalme output->ct.orig_tp.src, 2869dd7f890SJarno Rajahalme output->ct.orig_tp.dst, 287316d4d78SJarno Rajahalme output->ct_orig_proto, 2889dd7f890SJarno Rajahalme }; 2899dd7f890SJarno Rajahalme if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, 2909dd7f890SJarno Rajahalme sizeof(orig), &orig)) 2919dd7f890SJarno Rajahalme return -EMSGSIZE; 2929dd7f890SJarno Rajahalme } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 2939dd7f890SJarno Rajahalme struct ovs_key_ct_tuple_ipv6 orig = { 2949dd7f890SJarno Rajahalme IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src), 2959dd7f890SJarno Rajahalme IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), 2969dd7f890SJarno Rajahalme output->ct.orig_tp.src, 2979dd7f890SJarno Rajahalme output->ct.orig_tp.dst, 298316d4d78SJarno Rajahalme output->ct_orig_proto, 2999dd7f890SJarno Rajahalme }; 3009dd7f890SJarno Rajahalme if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, 3019dd7f890SJarno Rajahalme sizeof(orig), &orig)) 3029dd7f890SJarno Rajahalme return -EMSGSIZE; 3039dd7f890SJarno Rajahalme } 3049dd7f890SJarno Rajahalme } 3059dd7f890SJarno Rajahalme 306182e3042SJoe Stringer return 0; 307182e3042SJoe Stringer } 308182e3042SJoe Stringer 3096ffcea79SJarno Rajahalme static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, 310182e3042SJoe Stringer u32 ct_mark, u32 mask) 311182e3042SJoe Stringer { 3120d5cdef8SJoe Stringer #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 313182e3042SJoe Stringer u32 new_mark; 314182e3042SJoe Stringer 315182e3042SJoe Stringer new_mark = ct_mark | (ct->mark & ~(mask)); 316182e3042SJoe Stringer if (ct->mark != new_mark) { 317182e3042SJoe Stringer ct->mark = new_mark; 318193e3096SJarno Rajahalme if (nf_ct_is_confirmed(ct)) 319182e3042SJoe Stringer nf_conntrack_event_cache(IPCT_MARK, ct); 320182e3042SJoe Stringer key->ct.mark = new_mark; 321182e3042SJoe Stringer } 322182e3042SJoe Stringer 3237f8a436eSJoe Stringer return 0; 3240d5cdef8SJoe Stringer #else 3250d5cdef8SJoe Stringer return -ENOTSUPP; 3260d5cdef8SJoe Stringer #endif 3277f8a436eSJoe Stringer } 3287f8a436eSJoe Stringer 3296ffcea79SJarno Rajahalme static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct) 330c2ac6673SJoe Stringer { 331c2ac6673SJoe Stringer struct nf_conn_labels *cl; 332c2ac6673SJoe Stringer 333c2ac6673SJoe Stringer cl = nf_ct_labels_find(ct); 334c2ac6673SJoe Stringer if (!cl) { 335c2ac6673SJoe Stringer nf_ct_labels_ext_add(ct); 336c2ac6673SJoe Stringer cl = nf_ct_labels_find(ct); 337c2ac6673SJoe Stringer } 3386ffcea79SJarno Rajahalme 3396ffcea79SJarno Rajahalme return cl; 3406ffcea79SJarno Rajahalme } 3416ffcea79SJarno Rajahalme 3426ffcea79SJarno Rajahalme /* Initialize labels for a new, yet to be committed conntrack entry. Note that 3436ffcea79SJarno Rajahalme * since the new connection is not yet confirmed, and thus no-one else has 3442317c6b5SJarno Rajahalme * access to it's labels, we simply write them over. 3456ffcea79SJarno Rajahalme */ 3466ffcea79SJarno Rajahalme static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, 3476ffcea79SJarno Rajahalme const struct ovs_key_ct_labels *labels, 3486ffcea79SJarno Rajahalme const struct ovs_key_ct_labels *mask) 3496ffcea79SJarno Rajahalme { 35009aa98adSJarno Rajahalme struct nf_conn_labels *cl, *master_cl; 35109aa98adSJarno Rajahalme bool have_mask = labels_nonzero(mask); 35209aa98adSJarno Rajahalme 35309aa98adSJarno Rajahalme /* Inherit master's labels to the related connection? */ 35409aa98adSJarno Rajahalme master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL; 35509aa98adSJarno Rajahalme 35609aa98adSJarno Rajahalme if (!master_cl && !have_mask) 35709aa98adSJarno Rajahalme return 0; /* Nothing to do. */ 3586ffcea79SJarno Rajahalme 3596ffcea79SJarno Rajahalme cl = ovs_ct_get_conn_labels(ct); 360b87cec38SJarno Rajahalme if (!cl) 361c2ac6673SJoe Stringer return -ENOSPC; 362c2ac6673SJoe Stringer 36309aa98adSJarno Rajahalme /* Inherit the master's labels, if any. */ 36409aa98adSJarno Rajahalme if (master_cl) 36509aa98adSJarno Rajahalme *cl = *master_cl; 36609aa98adSJarno Rajahalme 36709aa98adSJarno Rajahalme if (have_mask) { 36809aa98adSJarno Rajahalme u32 *dst = (u32 *)cl->bits; 36909aa98adSJarno Rajahalme int i; 37009aa98adSJarno Rajahalme 3716ffcea79SJarno Rajahalme for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) 3726ffcea79SJarno Rajahalme dst[i] = (dst[i] & ~mask->ct_labels_32[i]) | 37309aa98adSJarno Rajahalme (labels->ct_labels_32[i] 37409aa98adSJarno Rajahalme & mask->ct_labels_32[i]); 37509aa98adSJarno Rajahalme } 3766ffcea79SJarno Rajahalme 3772317c6b5SJarno Rajahalme /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the 378abd0a4f2SJarno Rajahalme * IPCT_LABEL bit is set in the event cache. 3792317c6b5SJarno Rajahalme */ 3802317c6b5SJarno Rajahalme nf_conntrack_event_cache(IPCT_LABEL, ct); 3812317c6b5SJarno Rajahalme 3826ffcea79SJarno Rajahalme memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); 3836ffcea79SJarno Rajahalme 3846ffcea79SJarno Rajahalme return 0; 3856ffcea79SJarno Rajahalme } 3866ffcea79SJarno Rajahalme 3876ffcea79SJarno Rajahalme static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key, 3886ffcea79SJarno Rajahalme const struct ovs_key_ct_labels *labels, 3896ffcea79SJarno Rajahalme const struct ovs_key_ct_labels *mask) 3906ffcea79SJarno Rajahalme { 3916ffcea79SJarno Rajahalme struct nf_conn_labels *cl; 3926ffcea79SJarno Rajahalme int err; 3936ffcea79SJarno Rajahalme 3946ffcea79SJarno Rajahalme cl = ovs_ct_get_conn_labels(ct); 3956ffcea79SJarno Rajahalme if (!cl) 3966ffcea79SJarno Rajahalme return -ENOSPC; 3976ffcea79SJarno Rajahalme 3986ffcea79SJarno Rajahalme err = nf_connlabels_replace(ct, labels->ct_labels_32, 399cb80d58fSJarno Rajahalme mask->ct_labels_32, 400cb80d58fSJarno Rajahalme OVS_CT_LABELS_LEN_32); 401c2ac6673SJoe Stringer if (err) 402c2ac6673SJoe Stringer return err; 403193e3096SJarno Rajahalme 4046ffcea79SJarno Rajahalme memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); 405c2ac6673SJoe Stringer 406c2ac6673SJoe Stringer return 0; 407c2ac6673SJoe Stringer } 408c2ac6673SJoe Stringer 409cae3a262SJoe Stringer /* 'skb' should already be pulled to nh_ofs. */ 410cae3a262SJoe Stringer static int ovs_ct_helper(struct sk_buff *skb, u16 proto) 411cae3a262SJoe Stringer { 412cae3a262SJoe Stringer const struct nf_conntrack_helper *helper; 413cae3a262SJoe Stringer const struct nf_conn_help *help; 414cae3a262SJoe Stringer enum ip_conntrack_info ctinfo; 415cae3a262SJoe Stringer unsigned int protoff; 416cae3a262SJoe Stringer struct nf_conn *ct; 41705752523SJarno Rajahalme int err; 418cae3a262SJoe Stringer 419cae3a262SJoe Stringer ct = nf_ct_get(skb, &ctinfo); 420cae3a262SJoe Stringer if (!ct || ctinfo == IP_CT_RELATED_REPLY) 421cae3a262SJoe Stringer return NF_ACCEPT; 422cae3a262SJoe Stringer 423cae3a262SJoe Stringer help = nfct_help(ct); 424cae3a262SJoe Stringer if (!help) 425cae3a262SJoe Stringer return NF_ACCEPT; 426cae3a262SJoe Stringer 427cae3a262SJoe Stringer helper = rcu_dereference(help->helper); 428cae3a262SJoe Stringer if (!helper) 429cae3a262SJoe Stringer return NF_ACCEPT; 430cae3a262SJoe Stringer 431cae3a262SJoe Stringer switch (proto) { 432cae3a262SJoe Stringer case NFPROTO_IPV4: 433cae3a262SJoe Stringer protoff = ip_hdrlen(skb); 434cae3a262SJoe Stringer break; 435cae3a262SJoe Stringer case NFPROTO_IPV6: { 436cae3a262SJoe Stringer u8 nexthdr = ipv6_hdr(skb)->nexthdr; 437cae3a262SJoe Stringer __be16 frag_off; 438cc570605SJoe Stringer int ofs; 439cae3a262SJoe Stringer 440cc570605SJoe Stringer ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 441cc570605SJoe Stringer &frag_off); 442cc570605SJoe Stringer if (ofs < 0 || (frag_off & htons(~0x7)) != 0) { 443cae3a262SJoe Stringer pr_debug("proto header not found\n"); 444cae3a262SJoe Stringer return NF_ACCEPT; 445cae3a262SJoe Stringer } 446cc570605SJoe Stringer protoff = ofs; 447cae3a262SJoe Stringer break; 448cae3a262SJoe Stringer } 449cae3a262SJoe Stringer default: 450cae3a262SJoe Stringer WARN_ONCE(1, "helper invoked on non-IP family!"); 451cae3a262SJoe Stringer return NF_DROP; 452cae3a262SJoe Stringer } 453cae3a262SJoe Stringer 45405752523SJarno Rajahalme err = helper->help(skb, protoff, ct, ctinfo); 45505752523SJarno Rajahalme if (err != NF_ACCEPT) 45605752523SJarno Rajahalme return err; 45705752523SJarno Rajahalme 45805752523SJarno Rajahalme /* Adjust seqs after helper. This is needed due to some helpers (e.g., 45905752523SJarno Rajahalme * FTP with NAT) adusting the TCP payload size when mangling IP 46005752523SJarno Rajahalme * addresses and/or port numbers in the text-based control connection. 46105752523SJarno Rajahalme */ 46205752523SJarno Rajahalme if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 46305752523SJarno Rajahalme !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) 46405752523SJarno Rajahalme return NF_DROP; 46505752523SJarno Rajahalme return NF_ACCEPT; 466cae3a262SJoe Stringer } 467cae3a262SJoe Stringer 46874c16618SJoe Stringer /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 46974c16618SJoe Stringer * value if 'skb' is freed. 47074c16618SJoe Stringer */ 4717f8a436eSJoe Stringer static int handle_fragments(struct net *net, struct sw_flow_key *key, 4727f8a436eSJoe Stringer u16 zone, struct sk_buff *skb) 4737f8a436eSJoe Stringer { 4747f8a436eSJoe Stringer struct ovs_skb_cb ovs_cb = *OVS_CB(skb); 475daaa7d64SFlorian Westphal int err; 4767f8a436eSJoe Stringer 4777f8a436eSJoe Stringer if (key->eth.type == htons(ETH_P_IP)) { 4787f8a436eSJoe Stringer enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; 4797f8a436eSJoe Stringer 4807f8a436eSJoe Stringer memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 48119bcf9f2SEric W. Biederman err = ip_defrag(net, skb, user); 4827f8a436eSJoe Stringer if (err) 4837f8a436eSJoe Stringer return err; 4847f8a436eSJoe Stringer 4857f8a436eSJoe Stringer ovs_cb.mru = IPCB(skb)->frag_max_size; 4867f8a436eSJoe Stringer #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 48774c16618SJoe Stringer } else if (key->eth.type == htons(ETH_P_IPV6)) { 4887f8a436eSJoe Stringer enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 4897f8a436eSJoe Stringer 4907f8a436eSJoe Stringer memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 491daaa7d64SFlorian Westphal err = nf_ct_frag6_gather(net, skb, user); 492f92a80a9SDaniele Di Proietto if (err) { 493f92a80a9SDaniele Di Proietto if (err != -EINPROGRESS) 494f92a80a9SDaniele Di Proietto kfree_skb(skb); 495daaa7d64SFlorian Westphal return err; 496f92a80a9SDaniele Di Proietto } 4977f8a436eSJoe Stringer 498daaa7d64SFlorian Westphal key->ip.proto = ipv6_hdr(skb)->nexthdr; 4997f8a436eSJoe Stringer ovs_cb.mru = IP6CB(skb)->frag_max_size; 5007f8a436eSJoe Stringer #endif 5017f8a436eSJoe Stringer } else { 50274c16618SJoe Stringer kfree_skb(skb); 5037f8a436eSJoe Stringer return -EPFNOSUPPORT; 5047f8a436eSJoe Stringer } 5057f8a436eSJoe Stringer 5067f8a436eSJoe Stringer key->ip.frag = OVS_FRAG_TYPE_NONE; 5077f8a436eSJoe Stringer skb_clear_hash(skb); 5087f8a436eSJoe Stringer skb->ignore_df = 1; 5097f8a436eSJoe Stringer *OVS_CB(skb) = ovs_cb; 5107f8a436eSJoe Stringer 5117f8a436eSJoe Stringer return 0; 5127f8a436eSJoe Stringer } 5137f8a436eSJoe Stringer 5147f8a436eSJoe Stringer static struct nf_conntrack_expect * 5157f8a436eSJoe Stringer ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, 5167f8a436eSJoe Stringer u16 proto, const struct sk_buff *skb) 5177f8a436eSJoe Stringer { 5187f8a436eSJoe Stringer struct nf_conntrack_tuple tuple; 519cf5d7091SJarno Rajahalme struct nf_conntrack_expect *exp; 5207f8a436eSJoe Stringer 521a31f1adcSEric W. Biederman if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) 5227f8a436eSJoe Stringer return NULL; 523cf5d7091SJarno Rajahalme 524cf5d7091SJarno Rajahalme exp = __nf_ct_expect_find(net, zone, &tuple); 525cf5d7091SJarno Rajahalme if (exp) { 526cf5d7091SJarno Rajahalme struct nf_conntrack_tuple_hash *h; 527cf5d7091SJarno Rajahalme 528cf5d7091SJarno Rajahalme /* Delete existing conntrack entry, if it clashes with the 529cf5d7091SJarno Rajahalme * expectation. This can happen since conntrack ALGs do not 530cf5d7091SJarno Rajahalme * check for clashes between (new) expectations and existing 531cf5d7091SJarno Rajahalme * conntrack entries. nf_conntrack_in() will check the 532cf5d7091SJarno Rajahalme * expectations only if a conntrack entry can not be found, 533cf5d7091SJarno Rajahalme * which can lead to OVS finding the expectation (here) in the 534cf5d7091SJarno Rajahalme * init direction, but which will not be removed by the 535cf5d7091SJarno Rajahalme * nf_conntrack_in() call, if a matching conntrack entry is 536cf5d7091SJarno Rajahalme * found instead. In this case all init direction packets 537cf5d7091SJarno Rajahalme * would be reported as new related packets, while reply 538cf5d7091SJarno Rajahalme * direction packets would be reported as un-related 539cf5d7091SJarno Rajahalme * established packets. 540cf5d7091SJarno Rajahalme */ 541cf5d7091SJarno Rajahalme h = nf_conntrack_find_get(net, zone, &tuple); 542cf5d7091SJarno Rajahalme if (h) { 543cf5d7091SJarno Rajahalme struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 544cf5d7091SJarno Rajahalme 545cf5d7091SJarno Rajahalme nf_ct_delete(ct, 0, 0); 546cf5d7091SJarno Rajahalme nf_conntrack_put(&ct->ct_general); 547cf5d7091SJarno Rajahalme } 548cf5d7091SJarno Rajahalme } 549cf5d7091SJarno Rajahalme 550cf5d7091SJarno Rajahalme return exp; 5517f8a436eSJoe Stringer } 5527f8a436eSJoe Stringer 553289f2253SJarno Rajahalme /* This replicates logic from nf_conntrack_core.c that is not exported. */ 554289f2253SJarno Rajahalme static enum ip_conntrack_info 555289f2253SJarno Rajahalme ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) 556289f2253SJarno Rajahalme { 557289f2253SJarno Rajahalme const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 558289f2253SJarno Rajahalme 559289f2253SJarno Rajahalme if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) 560289f2253SJarno Rajahalme return IP_CT_ESTABLISHED_REPLY; 561289f2253SJarno Rajahalme /* Once we've had two way comms, always ESTABLISHED. */ 562289f2253SJarno Rajahalme if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 563289f2253SJarno Rajahalme return IP_CT_ESTABLISHED; 564289f2253SJarno Rajahalme if (test_bit(IPS_EXPECTED_BIT, &ct->status)) 565289f2253SJarno Rajahalme return IP_CT_RELATED; 566289f2253SJarno Rajahalme return IP_CT_NEW; 567289f2253SJarno Rajahalme } 568289f2253SJarno Rajahalme 569289f2253SJarno Rajahalme /* Find an existing connection which this packet belongs to without 570289f2253SJarno Rajahalme * re-attributing statistics or modifying the connection state. This allows an 5715e17da63SJarno Rajahalme * skb->_nfct lost due to an upcall to be recovered during actions execution. 572289f2253SJarno Rajahalme * 573289f2253SJarno Rajahalme * Must be called with rcu_read_lock. 574289f2253SJarno Rajahalme * 5755e17da63SJarno Rajahalme * On success, populates skb->_nfct and returns the connection. Returns NULL 5765e17da63SJarno Rajahalme * if there is no existing entry. 577289f2253SJarno Rajahalme */ 578289f2253SJarno Rajahalme static struct nf_conn * 579289f2253SJarno Rajahalme ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, 5809ff464dbSJarno Rajahalme u8 l3num, struct sk_buff *skb, bool natted) 581289f2253SJarno Rajahalme { 582b3480fe0SFlorian Westphal const struct nf_conntrack_l3proto *l3proto; 583b3480fe0SFlorian Westphal const struct nf_conntrack_l4proto *l4proto; 584289f2253SJarno Rajahalme struct nf_conntrack_tuple tuple; 585289f2253SJarno Rajahalme struct nf_conntrack_tuple_hash *h; 586289f2253SJarno Rajahalme struct nf_conn *ct; 587289f2253SJarno Rajahalme unsigned int dataoff; 588289f2253SJarno Rajahalme u8 protonum; 589289f2253SJarno Rajahalme 590289f2253SJarno Rajahalme l3proto = __nf_ct_l3proto_find(l3num); 591289f2253SJarno Rajahalme if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 592289f2253SJarno Rajahalme &protonum) <= 0) { 593289f2253SJarno Rajahalme pr_debug("ovs_ct_find_existing: Can't get protonum\n"); 594289f2253SJarno Rajahalme return NULL; 595289f2253SJarno Rajahalme } 596289f2253SJarno Rajahalme l4proto = __nf_ct_l4proto_find(l3num, protonum); 597289f2253SJarno Rajahalme if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 598289f2253SJarno Rajahalme protonum, net, &tuple, l3proto, l4proto)) { 599289f2253SJarno Rajahalme pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 600289f2253SJarno Rajahalme return NULL; 601289f2253SJarno Rajahalme } 602289f2253SJarno Rajahalme 6039ff464dbSJarno Rajahalme /* Must invert the tuple if skb has been transformed by NAT. */ 6049ff464dbSJarno Rajahalme if (natted) { 6059ff464dbSJarno Rajahalme struct nf_conntrack_tuple inverse; 6069ff464dbSJarno Rajahalme 6079ff464dbSJarno Rajahalme if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) { 6089ff464dbSJarno Rajahalme pr_debug("ovs_ct_find_existing: Inversion failed!\n"); 6099ff464dbSJarno Rajahalme return NULL; 6109ff464dbSJarno Rajahalme } 6119ff464dbSJarno Rajahalme tuple = inverse; 6129ff464dbSJarno Rajahalme } 6139ff464dbSJarno Rajahalme 614289f2253SJarno Rajahalme /* look for tuple match */ 615289f2253SJarno Rajahalme h = nf_conntrack_find_get(net, zone, &tuple); 616289f2253SJarno Rajahalme if (!h) 617289f2253SJarno Rajahalme return NULL; /* Not found. */ 618289f2253SJarno Rajahalme 619289f2253SJarno Rajahalme ct = nf_ct_tuplehash_to_ctrack(h); 620289f2253SJarno Rajahalme 6219ff464dbSJarno Rajahalme /* Inverted packet tuple matches the reverse direction conntrack tuple, 6229ff464dbSJarno Rajahalme * select the other tuplehash to get the right 'ctinfo' bits for this 6239ff464dbSJarno Rajahalme * packet. 6249ff464dbSJarno Rajahalme */ 6259ff464dbSJarno Rajahalme if (natted) 6269ff464dbSJarno Rajahalme h = &ct->tuplehash[!h->tuple.dst.dir]; 6279ff464dbSJarno Rajahalme 628c74454faSFlorian Westphal nf_ct_set(skb, ct, ovs_ct_get_info(h)); 629289f2253SJarno Rajahalme return ct; 630289f2253SJarno Rajahalme } 631289f2253SJarno Rajahalme 6328b97ac5bSGreg Rose static 6338b97ac5bSGreg Rose struct nf_conn *ovs_ct_executed(struct net *net, 6348b97ac5bSGreg Rose const struct sw_flow_key *key, 6358b97ac5bSGreg Rose const struct ovs_conntrack_info *info, 6368b97ac5bSGreg Rose struct sk_buff *skb, 6378b97ac5bSGreg Rose bool *ct_executed) 6388b97ac5bSGreg Rose { 6398b97ac5bSGreg Rose struct nf_conn *ct = NULL; 6408b97ac5bSGreg Rose 6418b97ac5bSGreg Rose /* If no ct, check if we have evidence that an existing conntrack entry 6428b97ac5bSGreg Rose * might be found for this skb. This happens when we lose a skb->_nfct 6438b97ac5bSGreg Rose * due to an upcall, or if the direction is being forced. If the 6448b97ac5bSGreg Rose * connection was not confirmed, it is not cached and needs to be run 6458b97ac5bSGreg Rose * through conntrack again. 6468b97ac5bSGreg Rose */ 6478b97ac5bSGreg Rose *ct_executed = (key->ct_state & OVS_CS_F_TRACKED) && 6488b97ac5bSGreg Rose !(key->ct_state & OVS_CS_F_INVALID) && 6498b97ac5bSGreg Rose (key->ct_zone == info->zone.id); 6508b97ac5bSGreg Rose 6518b97ac5bSGreg Rose if (*ct_executed || (!key->ct_state && info->force)) { 6528b97ac5bSGreg Rose ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, 6538b97ac5bSGreg Rose !!(key->ct_state & 6548b97ac5bSGreg Rose OVS_CS_F_NAT_MASK)); 6558b97ac5bSGreg Rose } 6568b97ac5bSGreg Rose 6578b97ac5bSGreg Rose return ct; 6588b97ac5bSGreg Rose } 6598b97ac5bSGreg Rose 6605e17da63SJarno Rajahalme /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ 661289f2253SJarno Rajahalme static bool skb_nfct_cached(struct net *net, 662289f2253SJarno Rajahalme const struct sw_flow_key *key, 663289f2253SJarno Rajahalme const struct ovs_conntrack_info *info, 664289f2253SJarno Rajahalme struct sk_buff *skb) 6657f8a436eSJoe Stringer { 6667f8a436eSJoe Stringer enum ip_conntrack_info ctinfo; 6677f8a436eSJoe Stringer struct nf_conn *ct; 6688b97ac5bSGreg Rose bool ct_executed = true; 6697f8a436eSJoe Stringer 6707f8a436eSJoe Stringer ct = nf_ct_get(skb, &ctinfo); 6718b97ac5bSGreg Rose if (!ct) 6728b97ac5bSGreg Rose ct = ovs_ct_executed(net, key, info, skb, &ct_executed); 6738b97ac5bSGreg Rose 674dd41d33fSJarno Rajahalme if (ct) 675dd41d33fSJarno Rajahalme nf_ct_get(skb, &ctinfo); 6768b97ac5bSGreg Rose else 6777f8a436eSJoe Stringer return false; 6788b97ac5bSGreg Rose 6797f8a436eSJoe Stringer if (!net_eq(net, read_pnet(&ct->ct_net))) 6807f8a436eSJoe Stringer return false; 6817f8a436eSJoe Stringer if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct))) 6827f8a436eSJoe Stringer return false; 683cae3a262SJoe Stringer if (info->helper) { 684cae3a262SJoe Stringer struct nf_conn_help *help; 685cae3a262SJoe Stringer 686cae3a262SJoe Stringer help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); 687cae3a262SJoe Stringer if (help && rcu_access_pointer(help->helper) != info->helper) 688cae3a262SJoe Stringer return false; 689cae3a262SJoe Stringer } 690dd41d33fSJarno Rajahalme /* Force conntrack entry direction to the current packet? */ 691dd41d33fSJarno Rajahalme if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 692dd41d33fSJarno Rajahalme /* Delete the conntrack entry if confirmed, else just release 693dd41d33fSJarno Rajahalme * the reference. 694dd41d33fSJarno Rajahalme */ 695dd41d33fSJarno Rajahalme if (nf_ct_is_confirmed(ct)) 696dd41d33fSJarno Rajahalme nf_ct_delete(ct, 0, 0); 697b768b16dSJarno Rajahalme 698dd41d33fSJarno Rajahalme nf_conntrack_put(&ct->ct_general); 699dd41d33fSJarno Rajahalme nf_ct_set(skb, NULL, 0); 700dd41d33fSJarno Rajahalme return false; 701dd41d33fSJarno Rajahalme } 7027f8a436eSJoe Stringer 7038b97ac5bSGreg Rose return ct_executed; 7047f8a436eSJoe Stringer } 7057f8a436eSJoe Stringer 70605752523SJarno Rajahalme #ifdef CONFIG_NF_NAT_NEEDED 70705752523SJarno Rajahalme /* Modelled after nf_nat_ipv[46]_fn(). 70805752523SJarno Rajahalme * range is only used for new, uninitialized NAT state. 70905752523SJarno Rajahalme * Returns either NF_ACCEPT or NF_DROP. 71005752523SJarno Rajahalme */ 71105752523SJarno Rajahalme static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 71205752523SJarno Rajahalme enum ip_conntrack_info ctinfo, 71305752523SJarno Rajahalme const struct nf_nat_range *range, 71405752523SJarno Rajahalme enum nf_nat_manip_type maniptype) 71505752523SJarno Rajahalme { 71605752523SJarno Rajahalme int hooknum, nh_off, err = NF_ACCEPT; 71705752523SJarno Rajahalme 71805752523SJarno Rajahalme nh_off = skb_network_offset(skb); 71975f01a4cSLance Richardson skb_pull_rcsum(skb, nh_off); 72005752523SJarno Rajahalme 72105752523SJarno Rajahalme /* See HOOK2MANIP(). */ 72205752523SJarno Rajahalme if (maniptype == NF_NAT_MANIP_SRC) 72305752523SJarno Rajahalme hooknum = NF_INET_LOCAL_IN; /* Source NAT */ 72405752523SJarno Rajahalme else 72505752523SJarno Rajahalme hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ 72605752523SJarno Rajahalme 72705752523SJarno Rajahalme switch (ctinfo) { 72805752523SJarno Rajahalme case IP_CT_RELATED: 72905752523SJarno Rajahalme case IP_CT_RELATED_REPLY: 73099b7248eSArnd Bergmann if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && 73199b7248eSArnd Bergmann skb->protocol == htons(ETH_P_IP) && 73205752523SJarno Rajahalme ip_hdr(skb)->protocol == IPPROTO_ICMP) { 73305752523SJarno Rajahalme if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 73405752523SJarno Rajahalme hooknum)) 73505752523SJarno Rajahalme err = NF_DROP; 73605752523SJarno Rajahalme goto push; 73799b7248eSArnd Bergmann } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && 73899b7248eSArnd Bergmann skb->protocol == htons(ETH_P_IPV6)) { 73905752523SJarno Rajahalme __be16 frag_off; 74005752523SJarno Rajahalme u8 nexthdr = ipv6_hdr(skb)->nexthdr; 74105752523SJarno Rajahalme int hdrlen = ipv6_skip_exthdr(skb, 74205752523SJarno Rajahalme sizeof(struct ipv6hdr), 74305752523SJarno Rajahalme &nexthdr, &frag_off); 74405752523SJarno Rajahalme 74505752523SJarno Rajahalme if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 74605752523SJarno Rajahalme if (!nf_nat_icmpv6_reply_translation(skb, ct, 74705752523SJarno Rajahalme ctinfo, 74805752523SJarno Rajahalme hooknum, 74905752523SJarno Rajahalme hdrlen)) 75005752523SJarno Rajahalme err = NF_DROP; 75105752523SJarno Rajahalme goto push; 75205752523SJarno Rajahalme } 75305752523SJarno Rajahalme } 75405752523SJarno Rajahalme /* Non-ICMP, fall thru to initialize if needed. */ 75505752523SJarno Rajahalme case IP_CT_NEW: 75605752523SJarno Rajahalme /* Seen it before? This can happen for loopback, retrans, 75705752523SJarno Rajahalme * or local packets. 75805752523SJarno Rajahalme */ 75905752523SJarno Rajahalme if (!nf_nat_initialized(ct, maniptype)) { 76005752523SJarno Rajahalme /* Initialize according to the NAT action. */ 76105752523SJarno Rajahalme err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) 76205752523SJarno Rajahalme /* Action is set up to establish a new 76305752523SJarno Rajahalme * mapping. 76405752523SJarno Rajahalme */ 76505752523SJarno Rajahalme ? nf_nat_setup_info(ct, range, maniptype) 76605752523SJarno Rajahalme : nf_nat_alloc_null_binding(ct, hooknum); 76705752523SJarno Rajahalme if (err != NF_ACCEPT) 76805752523SJarno Rajahalme goto push; 76905752523SJarno Rajahalme } 77005752523SJarno Rajahalme break; 77105752523SJarno Rajahalme 77205752523SJarno Rajahalme case IP_CT_ESTABLISHED: 77305752523SJarno Rajahalme case IP_CT_ESTABLISHED_REPLY: 77405752523SJarno Rajahalme break; 77505752523SJarno Rajahalme 77605752523SJarno Rajahalme default: 77705752523SJarno Rajahalme err = NF_DROP; 77805752523SJarno Rajahalme goto push; 77905752523SJarno Rajahalme } 78005752523SJarno Rajahalme 78105752523SJarno Rajahalme err = nf_nat_packet(ct, ctinfo, hooknum, skb); 78205752523SJarno Rajahalme push: 78305752523SJarno Rajahalme skb_push(skb, nh_off); 78475f01a4cSLance Richardson skb_postpush_rcsum(skb, skb->data, nh_off); 78505752523SJarno Rajahalme 78605752523SJarno Rajahalme return err; 78705752523SJarno Rajahalme } 78805752523SJarno Rajahalme 78905752523SJarno Rajahalme static void ovs_nat_update_key(struct sw_flow_key *key, 79005752523SJarno Rajahalme const struct sk_buff *skb, 79105752523SJarno Rajahalme enum nf_nat_manip_type maniptype) 79205752523SJarno Rajahalme { 79305752523SJarno Rajahalme if (maniptype == NF_NAT_MANIP_SRC) { 79405752523SJarno Rajahalme __be16 src; 79505752523SJarno Rajahalme 796316d4d78SJarno Rajahalme key->ct_state |= OVS_CS_F_SRC_NAT; 79705752523SJarno Rajahalme if (key->eth.type == htons(ETH_P_IP)) 79805752523SJarno Rajahalme key->ipv4.addr.src = ip_hdr(skb)->saddr; 79905752523SJarno Rajahalme else if (key->eth.type == htons(ETH_P_IPV6)) 80005752523SJarno Rajahalme memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, 80105752523SJarno Rajahalme sizeof(key->ipv6.addr.src)); 80205752523SJarno Rajahalme else 80305752523SJarno Rajahalme return; 80405752523SJarno Rajahalme 80505752523SJarno Rajahalme if (key->ip.proto == IPPROTO_UDP) 80605752523SJarno Rajahalme src = udp_hdr(skb)->source; 80705752523SJarno Rajahalme else if (key->ip.proto == IPPROTO_TCP) 80805752523SJarno Rajahalme src = tcp_hdr(skb)->source; 80905752523SJarno Rajahalme else if (key->ip.proto == IPPROTO_SCTP) 81005752523SJarno Rajahalme src = sctp_hdr(skb)->source; 81105752523SJarno Rajahalme else 81205752523SJarno Rajahalme return; 81305752523SJarno Rajahalme 81405752523SJarno Rajahalme key->tp.src = src; 81505752523SJarno Rajahalme } else { 81605752523SJarno Rajahalme __be16 dst; 81705752523SJarno Rajahalme 818316d4d78SJarno Rajahalme key->ct_state |= OVS_CS_F_DST_NAT; 81905752523SJarno Rajahalme if (key->eth.type == htons(ETH_P_IP)) 82005752523SJarno Rajahalme key->ipv4.addr.dst = ip_hdr(skb)->daddr; 82105752523SJarno Rajahalme else if (key->eth.type == htons(ETH_P_IPV6)) 82205752523SJarno Rajahalme memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, 82305752523SJarno Rajahalme sizeof(key->ipv6.addr.dst)); 82405752523SJarno Rajahalme else 82505752523SJarno Rajahalme return; 82605752523SJarno Rajahalme 82705752523SJarno Rajahalme if (key->ip.proto == IPPROTO_UDP) 82805752523SJarno Rajahalme dst = udp_hdr(skb)->dest; 82905752523SJarno Rajahalme else if (key->ip.proto == IPPROTO_TCP) 83005752523SJarno Rajahalme dst = tcp_hdr(skb)->dest; 83105752523SJarno Rajahalme else if (key->ip.proto == IPPROTO_SCTP) 83205752523SJarno Rajahalme dst = sctp_hdr(skb)->dest; 83305752523SJarno Rajahalme else 83405752523SJarno Rajahalme return; 83505752523SJarno Rajahalme 83605752523SJarno Rajahalme key->tp.dst = dst; 83705752523SJarno Rajahalme } 83805752523SJarno Rajahalme } 83905752523SJarno Rajahalme 84005752523SJarno Rajahalme /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ 84105752523SJarno Rajahalme static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 84205752523SJarno Rajahalme const struct ovs_conntrack_info *info, 84305752523SJarno Rajahalme struct sk_buff *skb, struct nf_conn *ct, 84405752523SJarno Rajahalme enum ip_conntrack_info ctinfo) 84505752523SJarno Rajahalme { 84605752523SJarno Rajahalme enum nf_nat_manip_type maniptype; 84705752523SJarno Rajahalme int err; 84805752523SJarno Rajahalme 84905752523SJarno Rajahalme /* Add NAT extension if not confirmed yet. */ 85005752523SJarno Rajahalme if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) 85105752523SJarno Rajahalme return NF_ACCEPT; /* Can't NAT. */ 85205752523SJarno Rajahalme 85305752523SJarno Rajahalme /* Determine NAT type. 85405752523SJarno Rajahalme * Check if the NAT type can be deduced from the tracked connection. 8555745b0beSJarno Rajahalme * Make sure new expected connections (IP_CT_RELATED) are NATted only 8565745b0beSJarno Rajahalme * when committing. 85705752523SJarno Rajahalme */ 85805752523SJarno Rajahalme if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && 85905752523SJarno Rajahalme ct->status & IPS_NAT_MASK && 8605745b0beSJarno Rajahalme (ctinfo != IP_CT_RELATED || info->commit)) { 86105752523SJarno Rajahalme /* NAT an established or related connection like before. */ 86205752523SJarno Rajahalme if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 86305752523SJarno Rajahalme /* This is the REPLY direction for a connection 86405752523SJarno Rajahalme * for which NAT was applied in the forward 86505752523SJarno Rajahalme * direction. Do the reverse NAT. 86605752523SJarno Rajahalme */ 86705752523SJarno Rajahalme maniptype = ct->status & IPS_SRC_NAT 86805752523SJarno Rajahalme ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; 86905752523SJarno Rajahalme else 87005752523SJarno Rajahalme maniptype = ct->status & IPS_SRC_NAT 87105752523SJarno Rajahalme ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; 87205752523SJarno Rajahalme } else if (info->nat & OVS_CT_SRC_NAT) { 87305752523SJarno Rajahalme maniptype = NF_NAT_MANIP_SRC; 87405752523SJarno Rajahalme } else if (info->nat & OVS_CT_DST_NAT) { 87505752523SJarno Rajahalme maniptype = NF_NAT_MANIP_DST; 87605752523SJarno Rajahalme } else { 87705752523SJarno Rajahalme return NF_ACCEPT; /* Connection is not NATed. */ 87805752523SJarno Rajahalme } 87905752523SJarno Rajahalme err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); 88005752523SJarno Rajahalme 88105752523SJarno Rajahalme /* Mark NAT done if successful and update the flow key. */ 88205752523SJarno Rajahalme if (err == NF_ACCEPT) 88305752523SJarno Rajahalme ovs_nat_update_key(key, skb, maniptype); 88405752523SJarno Rajahalme 88505752523SJarno Rajahalme return err; 88605752523SJarno Rajahalme } 88705752523SJarno Rajahalme #else /* !CONFIG_NF_NAT_NEEDED */ 88805752523SJarno Rajahalme static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 88905752523SJarno Rajahalme const struct ovs_conntrack_info *info, 89005752523SJarno Rajahalme struct sk_buff *skb, struct nf_conn *ct, 89105752523SJarno Rajahalme enum ip_conntrack_info ctinfo) 89205752523SJarno Rajahalme { 89305752523SJarno Rajahalme return NF_ACCEPT; 89405752523SJarno Rajahalme } 89505752523SJarno Rajahalme #endif 89605752523SJarno Rajahalme 8979f13ded8SJarno Rajahalme /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if 898394e910eSJarno Rajahalme * not done already. Update key with new CT state after passing the packet 899394e910eSJarno Rajahalme * through conntrack. 9005e17da63SJarno Rajahalme * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be 9019f13ded8SJarno Rajahalme * set to NULL and 0 will be returned. 9029f13ded8SJarno Rajahalme */ 9034f0909eeSJoe Stringer static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 9047f8a436eSJoe Stringer const struct ovs_conntrack_info *info, 9057f8a436eSJoe Stringer struct sk_buff *skb) 9067f8a436eSJoe Stringer { 9077f8a436eSJoe Stringer /* If we are recirculating packets to match on conntrack fields and 9087f8a436eSJoe Stringer * committing with a separate conntrack action, then we don't need to 9097f8a436eSJoe Stringer * actually run the packet through conntrack twice unless it's for a 9107f8a436eSJoe Stringer * different zone. 9117f8a436eSJoe Stringer */ 91228b6e0c1SJarno Rajahalme bool cached = skb_nfct_cached(net, key, info, skb); 91328b6e0c1SJarno Rajahalme enum ip_conntrack_info ctinfo; 91428b6e0c1SJarno Rajahalme struct nf_conn *ct; 91528b6e0c1SJarno Rajahalme 91628b6e0c1SJarno Rajahalme if (!cached) { 9177f8a436eSJoe Stringer struct nf_conn *tmpl = info->ct; 9185b6b9293SJarno Rajahalme int err; 9197f8a436eSJoe Stringer 9207f8a436eSJoe Stringer /* Associate skb with specified zone. */ 9217f8a436eSJoe Stringer if (tmpl) { 922cb9c6836SFlorian Westphal if (skb_nfct(skb)) 923cb9c6836SFlorian Westphal nf_conntrack_put(skb_nfct(skb)); 9247f8a436eSJoe Stringer nf_conntrack_get(&tmpl->ct_general); 925c74454faSFlorian Westphal nf_ct_set(skb, tmpl, IP_CT_NEW); 9267f8a436eSJoe Stringer } 9277f8a436eSJoe Stringer 9285b6b9293SJarno Rajahalme err = nf_conntrack_in(net, info->family, 9295b6b9293SJarno Rajahalme NF_INET_PRE_ROUTING, skb); 9305b6b9293SJarno Rajahalme if (err != NF_ACCEPT) 9317f8a436eSJoe Stringer return -ENOENT; 932cae3a262SJoe Stringer 93305752523SJarno Rajahalme /* Clear CT state NAT flags to mark that we have not yet done 93405752523SJarno Rajahalme * NAT after the nf_conntrack_in() call. We can actually clear 93505752523SJarno Rajahalme * the whole state, as it will be re-initialized below. 93605752523SJarno Rajahalme */ 937316d4d78SJarno Rajahalme key->ct_state = 0; 93805752523SJarno Rajahalme 93905752523SJarno Rajahalme /* Update the key, but keep the NAT flags. */ 94005752523SJarno Rajahalme ovs_ct_update_key(skb, info, key, true, true); 94105752523SJarno Rajahalme } 94205752523SJarno Rajahalme 94305752523SJarno Rajahalme ct = nf_ct_get(skb, &ctinfo); 94405752523SJarno Rajahalme if (ct) { 94505752523SJarno Rajahalme /* Packets starting a new connection must be NATted before the 94605752523SJarno Rajahalme * helper, so that the helper knows about the NAT. We enforce 94705752523SJarno Rajahalme * this by delaying both NAT and helper calls for unconfirmed 94805752523SJarno Rajahalme * connections until the committing CT action. For later 94905752523SJarno Rajahalme * packets NAT and Helper may be called in either order. 95005752523SJarno Rajahalme * 95105752523SJarno Rajahalme * NAT will be done only if the CT action has NAT, and only 95205752523SJarno Rajahalme * once per packet (per zone), as guarded by the NAT bits in 953316d4d78SJarno Rajahalme * the key->ct_state. 95405752523SJarno Rajahalme */ 955316d4d78SJarno Rajahalme if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && 95605752523SJarno Rajahalme (nf_ct_is_confirmed(ct) || info->commit) && 95705752523SJarno Rajahalme ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { 95805752523SJarno Rajahalme return -EINVAL; 95928b6e0c1SJarno Rajahalme } 960394e910eSJarno Rajahalme 96116ec3d4fSJoe Stringer /* Userspace may decide to perform a ct lookup without a helper 96216ec3d4fSJoe Stringer * specified followed by a (recirculate and) commit with one. 96316ec3d4fSJoe Stringer * Therefore, for unconfirmed connections which we will commit, 96416ec3d4fSJoe Stringer * we need to attach the helper here. 96516ec3d4fSJoe Stringer */ 96616ec3d4fSJoe Stringer if (!nf_ct_is_confirmed(ct) && info->commit && 96716ec3d4fSJoe Stringer info->helper && !nfct_help(ct)) { 96816ec3d4fSJoe Stringer int err = __nf_ct_try_assign_helper(ct, info->ct, 96916ec3d4fSJoe Stringer GFP_ATOMIC); 97016ec3d4fSJoe Stringer if (err) 97116ec3d4fSJoe Stringer return err; 97216ec3d4fSJoe Stringer } 97316ec3d4fSJoe Stringer 97428b6e0c1SJarno Rajahalme /* Call the helper only if: 97505752523SJarno Rajahalme * - nf_conntrack_in() was executed above ("!cached") for a 97605752523SJarno Rajahalme * confirmed connection, or 97728b6e0c1SJarno Rajahalme * - When committing an unconfirmed connection. 97828b6e0c1SJarno Rajahalme */ 97905752523SJarno Rajahalme if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) && 98028b6e0c1SJarno Rajahalme ovs_ct_helper(skb, info->family) != NF_ACCEPT) { 981cae3a262SJoe Stringer return -EINVAL; 982cae3a262SJoe Stringer } 98305752523SJarno Rajahalme } 9847f8a436eSJoe Stringer 9857f8a436eSJoe Stringer return 0; 9867f8a436eSJoe Stringer } 9877f8a436eSJoe Stringer 9887f8a436eSJoe Stringer /* Lookup connection and read fields into key. */ 9897f8a436eSJoe Stringer static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 9907f8a436eSJoe Stringer const struct ovs_conntrack_info *info, 9917f8a436eSJoe Stringer struct sk_buff *skb) 9927f8a436eSJoe Stringer { 9937f8a436eSJoe Stringer struct nf_conntrack_expect *exp; 9947f8a436eSJoe Stringer 9959f13ded8SJarno Rajahalme /* If we pass an expected packet through nf_conntrack_in() the 9969f13ded8SJarno Rajahalme * expectation is typically removed, but the packet could still be 9979f13ded8SJarno Rajahalme * lost in upcall processing. To prevent this from happening we 9989f13ded8SJarno Rajahalme * perform an explicit expectation lookup. Expected connections are 9999f13ded8SJarno Rajahalme * always new, and will be passed through conntrack only when they are 10009f13ded8SJarno Rajahalme * committed, as it is OK to remove the expectation at that time. 10019f13ded8SJarno Rajahalme */ 10027f8a436eSJoe Stringer exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); 10037f8a436eSJoe Stringer if (exp) { 10047f8a436eSJoe Stringer u8 state; 10057f8a436eSJoe Stringer 100605752523SJarno Rajahalme /* NOTE: New connections are NATted and Helped only when 100705752523SJarno Rajahalme * committed, so we are not calling into NAT here. 100805752523SJarno Rajahalme */ 10097f8a436eSJoe Stringer state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; 1010182e3042SJoe Stringer __ovs_ct_update_key(key, state, &info->zone, exp->master); 1011d913d3a7SSamuel Gauthier } else { 1012d913d3a7SSamuel Gauthier struct nf_conn *ct; 1013d913d3a7SSamuel Gauthier int err; 1014d913d3a7SSamuel Gauthier 1015d913d3a7SSamuel Gauthier err = __ovs_ct_lookup(net, key, info, skb); 1016d913d3a7SSamuel Gauthier if (err) 1017d913d3a7SSamuel Gauthier return err; 1018d913d3a7SSamuel Gauthier 1019cb9c6836SFlorian Westphal ct = (struct nf_conn *)skb_nfct(skb); 1020d913d3a7SSamuel Gauthier if (ct) 1021d913d3a7SSamuel Gauthier nf_ct_deliver_cached_events(ct); 1022d913d3a7SSamuel Gauthier } 10237f8a436eSJoe Stringer 10247f8a436eSJoe Stringer return 0; 10257f8a436eSJoe Stringer } 10267f8a436eSJoe Stringer 102733db4125SJoe Stringer static bool labels_nonzero(const struct ovs_key_ct_labels *labels) 1028c2ac6673SJoe Stringer { 1029c2ac6673SJoe Stringer size_t i; 1030c2ac6673SJoe Stringer 1031cb80d58fSJarno Rajahalme for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) 1032cb80d58fSJarno Rajahalme if (labels->ct_labels_32[i]) 1033c2ac6673SJoe Stringer return true; 1034c2ac6673SJoe Stringer 1035c2ac6673SJoe Stringer return false; 1036c2ac6673SJoe Stringer } 1037c2ac6673SJoe Stringer 10387d904c7bSJarno Rajahalme /* Lookup connection and confirm if unconfirmed. */ 10397d904c7bSJarno Rajahalme static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, 10407d904c7bSJarno Rajahalme const struct ovs_conntrack_info *info, 10417d904c7bSJarno Rajahalme struct sk_buff *skb) 10427d904c7bSJarno Rajahalme { 10436ffcea79SJarno Rajahalme enum ip_conntrack_info ctinfo; 10446ffcea79SJarno Rajahalme struct nf_conn *ct; 10457d904c7bSJarno Rajahalme int err; 10467d904c7bSJarno Rajahalme 10477d904c7bSJarno Rajahalme err = __ovs_ct_lookup(net, key, info, skb); 10487d904c7bSJarno Rajahalme if (err) 10497d904c7bSJarno Rajahalme return err; 10507d904c7bSJarno Rajahalme 10516ffcea79SJarno Rajahalme /* The connection could be invalid, in which case this is a no-op.*/ 10526ffcea79SJarno Rajahalme ct = nf_ct_get(skb, &ctinfo); 10536ffcea79SJarno Rajahalme if (!ct) 10546ffcea79SJarno Rajahalme return 0; 10556ffcea79SJarno Rajahalme 105612064551SJarno Rajahalme /* Set the conntrack event mask if given. NEW and DELETE events have 105712064551SJarno Rajahalme * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener 105812064551SJarno Rajahalme * typically would receive many kinds of updates. Setting the event 105912064551SJarno Rajahalme * mask allows those events to be filtered. The set event mask will 106012064551SJarno Rajahalme * remain in effect for the lifetime of the connection unless changed 106112064551SJarno Rajahalme * by a further CT action with both the commit flag and the eventmask 106212064551SJarno Rajahalme * option. */ 106312064551SJarno Rajahalme if (info->have_eventmask) { 106412064551SJarno Rajahalme struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct); 106512064551SJarno Rajahalme 106612064551SJarno Rajahalme if (cache) 106712064551SJarno Rajahalme cache->ctmask = info->eventmask; 106812064551SJarno Rajahalme } 106912064551SJarno Rajahalme 10707d904c7bSJarno Rajahalme /* Apply changes before confirming the connection so that the initial 10717d904c7bSJarno Rajahalme * conntrack NEW netlink event carries the values given in the CT 10727d904c7bSJarno Rajahalme * action. 10737d904c7bSJarno Rajahalme */ 10747d904c7bSJarno Rajahalme if (info->mark.mask) { 10756ffcea79SJarno Rajahalme err = ovs_ct_set_mark(ct, key, info->mark.value, 10767d904c7bSJarno Rajahalme info->mark.mask); 10777d904c7bSJarno Rajahalme if (err) 10787d904c7bSJarno Rajahalme return err; 10797d904c7bSJarno Rajahalme } 108009aa98adSJarno Rajahalme if (!nf_ct_is_confirmed(ct)) { 10816ffcea79SJarno Rajahalme err = ovs_ct_init_labels(ct, key, &info->labels.value, 10826ffcea79SJarno Rajahalme &info->labels.mask); 108309aa98adSJarno Rajahalme if (err) 108409aa98adSJarno Rajahalme return err; 108509aa98adSJarno Rajahalme } else if (labels_nonzero(&info->labels.mask)) { 10866ffcea79SJarno Rajahalme err = ovs_ct_set_labels(ct, key, &info->labels.value, 10877d904c7bSJarno Rajahalme &info->labels.mask); 10887d904c7bSJarno Rajahalme if (err) 10897d904c7bSJarno Rajahalme return err; 10907d904c7bSJarno Rajahalme } 10917d904c7bSJarno Rajahalme /* This will take care of sending queued events even if the connection 10927d904c7bSJarno Rajahalme * is already confirmed. 10937d904c7bSJarno Rajahalme */ 10947d904c7bSJarno Rajahalme if (nf_conntrack_confirm(skb) != NF_ACCEPT) 10957d904c7bSJarno Rajahalme return -EINVAL; 10967d904c7bSJarno Rajahalme 10977d904c7bSJarno Rajahalme return 0; 10987d904c7bSJarno Rajahalme } 10997d904c7bSJarno Rajahalme 110074c16618SJoe Stringer /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 110174c16618SJoe Stringer * value if 'skb' is freed. 110274c16618SJoe Stringer */ 11037f8a436eSJoe Stringer int ovs_ct_execute(struct net *net, struct sk_buff *skb, 11047f8a436eSJoe Stringer struct sw_flow_key *key, 11057f8a436eSJoe Stringer const struct ovs_conntrack_info *info) 11067f8a436eSJoe Stringer { 11077f8a436eSJoe Stringer int nh_ofs; 11087f8a436eSJoe Stringer int err; 11097f8a436eSJoe Stringer 11107f8a436eSJoe Stringer /* The conntrack module expects to be working at L3. */ 11117f8a436eSJoe Stringer nh_ofs = skb_network_offset(skb); 111275f01a4cSLance Richardson skb_pull_rcsum(skb, nh_ofs); 11137f8a436eSJoe Stringer 11147f8a436eSJoe Stringer if (key->ip.frag != OVS_FRAG_TYPE_NONE) { 11157f8a436eSJoe Stringer err = handle_fragments(net, key, info->zone.id, skb); 11167f8a436eSJoe Stringer if (err) 11177f8a436eSJoe Stringer return err; 11187f8a436eSJoe Stringer } 11197f8a436eSJoe Stringer 1120ab38a7b5SJoe Stringer if (info->commit) 11217d904c7bSJarno Rajahalme err = ovs_ct_commit(net, key, info, skb); 11227f8a436eSJoe Stringer else 11237f8a436eSJoe Stringer err = ovs_ct_lookup(net, key, info, skb); 11247f8a436eSJoe Stringer 11257f8a436eSJoe Stringer skb_push(skb, nh_ofs); 112675f01a4cSLance Richardson skb_postpush_rcsum(skb, skb->data, nh_ofs); 112774c16618SJoe Stringer if (err) 112874c16618SJoe Stringer kfree_skb(skb); 11297f8a436eSJoe Stringer return err; 11307f8a436eSJoe Stringer } 11317f8a436eSJoe Stringer 1132cae3a262SJoe Stringer static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, 1133cae3a262SJoe Stringer const struct sw_flow_key *key, bool log) 1134cae3a262SJoe Stringer { 1135cae3a262SJoe Stringer struct nf_conntrack_helper *helper; 1136cae3a262SJoe Stringer struct nf_conn_help *help; 1137cae3a262SJoe Stringer 1138cae3a262SJoe Stringer helper = nf_conntrack_helper_try_module_get(name, info->family, 1139cae3a262SJoe Stringer key->ip.proto); 1140cae3a262SJoe Stringer if (!helper) { 1141cae3a262SJoe Stringer OVS_NLERR(log, "Unknown helper \"%s\"", name); 1142cae3a262SJoe Stringer return -EINVAL; 1143cae3a262SJoe Stringer } 1144cae3a262SJoe Stringer 1145cae3a262SJoe Stringer help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); 1146cae3a262SJoe Stringer if (!help) { 1147d91fc59cSLiping Zhang nf_conntrack_helper_put(helper); 1148cae3a262SJoe Stringer return -ENOMEM; 1149cae3a262SJoe Stringer } 1150cae3a262SJoe Stringer 1151cae3a262SJoe Stringer rcu_assign_pointer(help->helper, helper); 1152cae3a262SJoe Stringer info->helper = helper; 1153cae3a262SJoe Stringer return 0; 1154cae3a262SJoe Stringer } 1155cae3a262SJoe Stringer 115605752523SJarno Rajahalme #ifdef CONFIG_NF_NAT_NEEDED 115705752523SJarno Rajahalme static int parse_nat(const struct nlattr *attr, 115805752523SJarno Rajahalme struct ovs_conntrack_info *info, bool log) 115905752523SJarno Rajahalme { 116005752523SJarno Rajahalme struct nlattr *a; 116105752523SJarno Rajahalme int rem; 116205752523SJarno Rajahalme bool have_ip_max = false; 116305752523SJarno Rajahalme bool have_proto_max = false; 116405752523SJarno Rajahalme bool ip_vers = (info->family == NFPROTO_IPV6); 116505752523SJarno Rajahalme 116605752523SJarno Rajahalme nla_for_each_nested(a, attr, rem) { 116705752523SJarno Rajahalme static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = { 116805752523SJarno Rajahalme [OVS_NAT_ATTR_SRC] = {0, 0}, 116905752523SJarno Rajahalme [OVS_NAT_ATTR_DST] = {0, 0}, 117005752523SJarno Rajahalme [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr), 117105752523SJarno Rajahalme sizeof(struct in6_addr)}, 117205752523SJarno Rajahalme [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr), 117305752523SJarno Rajahalme sizeof(struct in6_addr)}, 117405752523SJarno Rajahalme [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)}, 117505752523SJarno Rajahalme [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)}, 117605752523SJarno Rajahalme [OVS_NAT_ATTR_PERSISTENT] = {0, 0}, 117705752523SJarno Rajahalme [OVS_NAT_ATTR_PROTO_HASH] = {0, 0}, 117805752523SJarno Rajahalme [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0}, 117905752523SJarno Rajahalme }; 118005752523SJarno Rajahalme int type = nla_type(a); 118105752523SJarno Rajahalme 118205752523SJarno Rajahalme if (type > OVS_NAT_ATTR_MAX) { 118305752523SJarno Rajahalme OVS_NLERR(log, 118405752523SJarno Rajahalme "Unknown NAT attribute (type=%d, max=%d).\n", 118505752523SJarno Rajahalme type, OVS_NAT_ATTR_MAX); 118605752523SJarno Rajahalme return -EINVAL; 118705752523SJarno Rajahalme } 118805752523SJarno Rajahalme 118905752523SJarno Rajahalme if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) { 119005752523SJarno Rajahalme OVS_NLERR(log, 119105752523SJarno Rajahalme "NAT attribute type %d has unexpected length (%d != %d).\n", 119205752523SJarno Rajahalme type, nla_len(a), 119305752523SJarno Rajahalme ovs_nat_attr_lens[type][ip_vers]); 119405752523SJarno Rajahalme return -EINVAL; 119505752523SJarno Rajahalme } 119605752523SJarno Rajahalme 119705752523SJarno Rajahalme switch (type) { 119805752523SJarno Rajahalme case OVS_NAT_ATTR_SRC: 119905752523SJarno Rajahalme case OVS_NAT_ATTR_DST: 120005752523SJarno Rajahalme if (info->nat) { 120105752523SJarno Rajahalme OVS_NLERR(log, 120205752523SJarno Rajahalme "Only one type of NAT may be specified.\n" 120305752523SJarno Rajahalme ); 120405752523SJarno Rajahalme return -ERANGE; 120505752523SJarno Rajahalme } 120605752523SJarno Rajahalme info->nat |= OVS_CT_NAT; 120705752523SJarno Rajahalme info->nat |= ((type == OVS_NAT_ATTR_SRC) 120805752523SJarno Rajahalme ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT); 120905752523SJarno Rajahalme break; 121005752523SJarno Rajahalme 121105752523SJarno Rajahalme case OVS_NAT_ATTR_IP_MIN: 1212ac71b46eSHaishuang Yan nla_memcpy(&info->range.min_addr, a, 1213ac71b46eSHaishuang Yan sizeof(info->range.min_addr)); 121405752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_MAP_IPS; 121505752523SJarno Rajahalme break; 121605752523SJarno Rajahalme 121705752523SJarno Rajahalme case OVS_NAT_ATTR_IP_MAX: 121805752523SJarno Rajahalme have_ip_max = true; 121905752523SJarno Rajahalme nla_memcpy(&info->range.max_addr, a, 122005752523SJarno Rajahalme sizeof(info->range.max_addr)); 122105752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_MAP_IPS; 122205752523SJarno Rajahalme break; 122305752523SJarno Rajahalme 122405752523SJarno Rajahalme case OVS_NAT_ATTR_PROTO_MIN: 122505752523SJarno Rajahalme info->range.min_proto.all = htons(nla_get_u16(a)); 122605752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 122705752523SJarno Rajahalme break; 122805752523SJarno Rajahalme 122905752523SJarno Rajahalme case OVS_NAT_ATTR_PROTO_MAX: 123005752523SJarno Rajahalme have_proto_max = true; 123105752523SJarno Rajahalme info->range.max_proto.all = htons(nla_get_u16(a)); 123205752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 123305752523SJarno Rajahalme break; 123405752523SJarno Rajahalme 123505752523SJarno Rajahalme case OVS_NAT_ATTR_PERSISTENT: 123605752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PERSISTENT; 123705752523SJarno Rajahalme break; 123805752523SJarno Rajahalme 123905752523SJarno Rajahalme case OVS_NAT_ATTR_PROTO_HASH: 124005752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM; 124105752523SJarno Rajahalme break; 124205752523SJarno Rajahalme 124305752523SJarno Rajahalme case OVS_NAT_ATTR_PROTO_RANDOM: 124405752523SJarno Rajahalme info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY; 124505752523SJarno Rajahalme break; 124605752523SJarno Rajahalme 124705752523SJarno Rajahalme default: 124805752523SJarno Rajahalme OVS_NLERR(log, "Unknown nat attribute (%d).\n", type); 124905752523SJarno Rajahalme return -EINVAL; 125005752523SJarno Rajahalme } 125105752523SJarno Rajahalme } 125205752523SJarno Rajahalme 125305752523SJarno Rajahalme if (rem > 0) { 125405752523SJarno Rajahalme OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem); 125505752523SJarno Rajahalme return -EINVAL; 125605752523SJarno Rajahalme } 125705752523SJarno Rajahalme if (!info->nat) { 125805752523SJarno Rajahalme /* Do not allow flags if no type is given. */ 125905752523SJarno Rajahalme if (info->range.flags) { 126005752523SJarno Rajahalme OVS_NLERR(log, 126105752523SJarno Rajahalme "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n" 126205752523SJarno Rajahalme ); 126305752523SJarno Rajahalme return -EINVAL; 126405752523SJarno Rajahalme } 126505752523SJarno Rajahalme info->nat = OVS_CT_NAT; /* NAT existing connections. */ 126605752523SJarno Rajahalme } else if (!info->commit) { 126705752523SJarno Rajahalme OVS_NLERR(log, 126805752523SJarno Rajahalme "NAT attributes may be specified only when CT COMMIT flag is also specified.\n" 126905752523SJarno Rajahalme ); 127005752523SJarno Rajahalme return -EINVAL; 127105752523SJarno Rajahalme } 127205752523SJarno Rajahalme /* Allow missing IP_MAX. */ 127305752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) { 127405752523SJarno Rajahalme memcpy(&info->range.max_addr, &info->range.min_addr, 127505752523SJarno Rajahalme sizeof(info->range.max_addr)); 127605752523SJarno Rajahalme } 127705752523SJarno Rajahalme /* Allow missing PROTO_MAX. */ 127805752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 127905752523SJarno Rajahalme !have_proto_max) { 128005752523SJarno Rajahalme info->range.max_proto.all = info->range.min_proto.all; 128105752523SJarno Rajahalme } 128205752523SJarno Rajahalme return 0; 128305752523SJarno Rajahalme } 128405752523SJarno Rajahalme #endif 128505752523SJarno Rajahalme 12867f8a436eSJoe Stringer static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { 1287ab38a7b5SJoe Stringer [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1288dd41d33fSJarno Rajahalme [OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 }, 12897f8a436eSJoe Stringer [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), 12907f8a436eSJoe Stringer .maxlen = sizeof(u16) }, 1291182e3042SJoe Stringer [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), 1292182e3042SJoe Stringer .maxlen = sizeof(struct md_mark) }, 129333db4125SJoe Stringer [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), 129433db4125SJoe Stringer .maxlen = sizeof(struct md_labels) }, 1295cae3a262SJoe Stringer [OVS_CT_ATTR_HELPER] = { .minlen = 1, 129605752523SJarno Rajahalme .maxlen = NF_CT_HELPER_NAME_LEN }, 129705752523SJarno Rajahalme #ifdef CONFIG_NF_NAT_NEEDED 129805752523SJarno Rajahalme /* NAT length is checked when parsing the nested attributes. */ 129905752523SJarno Rajahalme [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, 130005752523SJarno Rajahalme #endif 130112064551SJarno Rajahalme [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32), 130212064551SJarno Rajahalme .maxlen = sizeof(u32) }, 13037f8a436eSJoe Stringer }; 13047f8a436eSJoe Stringer 13057f8a436eSJoe Stringer static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, 1306cae3a262SJoe Stringer const char **helper, bool log) 13077f8a436eSJoe Stringer { 13087f8a436eSJoe Stringer struct nlattr *a; 13097f8a436eSJoe Stringer int rem; 13107f8a436eSJoe Stringer 13117f8a436eSJoe Stringer nla_for_each_nested(a, attr, rem) { 13127f8a436eSJoe Stringer int type = nla_type(a); 13137f8a436eSJoe Stringer int maxlen = ovs_ct_attr_lens[type].maxlen; 13147f8a436eSJoe Stringer int minlen = ovs_ct_attr_lens[type].minlen; 13157f8a436eSJoe Stringer 13167f8a436eSJoe Stringer if (type > OVS_CT_ATTR_MAX) { 13177f8a436eSJoe Stringer OVS_NLERR(log, 13187f8a436eSJoe Stringer "Unknown conntrack attr (type=%d, max=%d)", 13197f8a436eSJoe Stringer type, OVS_CT_ATTR_MAX); 13207f8a436eSJoe Stringer return -EINVAL; 13217f8a436eSJoe Stringer } 13227f8a436eSJoe Stringer if (nla_len(a) < minlen || nla_len(a) > maxlen) { 13237f8a436eSJoe Stringer OVS_NLERR(log, 13247f8a436eSJoe Stringer "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", 13257f8a436eSJoe Stringer type, nla_len(a), maxlen); 13267f8a436eSJoe Stringer return -EINVAL; 13277f8a436eSJoe Stringer } 13287f8a436eSJoe Stringer 13297f8a436eSJoe Stringer switch (type) { 1330dd41d33fSJarno Rajahalme case OVS_CT_ATTR_FORCE_COMMIT: 1331dd41d33fSJarno Rajahalme info->force = true; 1332dd41d33fSJarno Rajahalme /* fall through. */ 1333ab38a7b5SJoe Stringer case OVS_CT_ATTR_COMMIT: 1334ab38a7b5SJoe Stringer info->commit = true; 13357f8a436eSJoe Stringer break; 13367f8a436eSJoe Stringer #ifdef CONFIG_NF_CONNTRACK_ZONES 13377f8a436eSJoe Stringer case OVS_CT_ATTR_ZONE: 13387f8a436eSJoe Stringer info->zone.id = nla_get_u16(a); 13397f8a436eSJoe Stringer break; 13407f8a436eSJoe Stringer #endif 1341182e3042SJoe Stringer #ifdef CONFIG_NF_CONNTRACK_MARK 1342182e3042SJoe Stringer case OVS_CT_ATTR_MARK: { 1343182e3042SJoe Stringer struct md_mark *mark = nla_data(a); 1344182e3042SJoe Stringer 1345e754ec69SJoe Stringer if (!mark->mask) { 1346e754ec69SJoe Stringer OVS_NLERR(log, "ct_mark mask cannot be 0"); 1347e754ec69SJoe Stringer return -EINVAL; 1348e754ec69SJoe Stringer } 1349182e3042SJoe Stringer info->mark = *mark; 1350182e3042SJoe Stringer break; 1351182e3042SJoe Stringer } 1352182e3042SJoe Stringer #endif 1353c2ac6673SJoe Stringer #ifdef CONFIG_NF_CONNTRACK_LABELS 135433db4125SJoe Stringer case OVS_CT_ATTR_LABELS: { 135533db4125SJoe Stringer struct md_labels *labels = nla_data(a); 1356c2ac6673SJoe Stringer 1357e754ec69SJoe Stringer if (!labels_nonzero(&labels->mask)) { 1358e754ec69SJoe Stringer OVS_NLERR(log, "ct_labels mask cannot be 0"); 1359e754ec69SJoe Stringer return -EINVAL; 1360e754ec69SJoe Stringer } 136133db4125SJoe Stringer info->labels = *labels; 1362c2ac6673SJoe Stringer break; 1363c2ac6673SJoe Stringer } 1364c2ac6673SJoe Stringer #endif 1365cae3a262SJoe Stringer case OVS_CT_ATTR_HELPER: 1366cae3a262SJoe Stringer *helper = nla_data(a); 1367cae3a262SJoe Stringer if (!memchr(*helper, '\0', nla_len(a))) { 1368cae3a262SJoe Stringer OVS_NLERR(log, "Invalid conntrack helper"); 1369cae3a262SJoe Stringer return -EINVAL; 1370cae3a262SJoe Stringer } 1371cae3a262SJoe Stringer break; 137205752523SJarno Rajahalme #ifdef CONFIG_NF_NAT_NEEDED 137305752523SJarno Rajahalme case OVS_CT_ATTR_NAT: { 137405752523SJarno Rajahalme int err = parse_nat(a, info, log); 137505752523SJarno Rajahalme 137605752523SJarno Rajahalme if (err) 137705752523SJarno Rajahalme return err; 137805752523SJarno Rajahalme break; 137905752523SJarno Rajahalme } 138005752523SJarno Rajahalme #endif 138112064551SJarno Rajahalme case OVS_CT_ATTR_EVENTMASK: 138212064551SJarno Rajahalme info->have_eventmask = true; 138312064551SJarno Rajahalme info->eventmask = nla_get_u32(a); 138412064551SJarno Rajahalme break; 138512064551SJarno Rajahalme 13867f8a436eSJoe Stringer default: 13877f8a436eSJoe Stringer OVS_NLERR(log, "Unknown conntrack attr (%d)", 13887f8a436eSJoe Stringer type); 13897f8a436eSJoe Stringer return -EINVAL; 13907f8a436eSJoe Stringer } 13917f8a436eSJoe Stringer } 13927f8a436eSJoe Stringer 13937d904c7bSJarno Rajahalme #ifdef CONFIG_NF_CONNTRACK_MARK 13947d904c7bSJarno Rajahalme if (!info->commit && info->mark.mask) { 13957d904c7bSJarno Rajahalme OVS_NLERR(log, 13967d904c7bSJarno Rajahalme "Setting conntrack mark requires 'commit' flag."); 13977d904c7bSJarno Rajahalme return -EINVAL; 13987d904c7bSJarno Rajahalme } 13997d904c7bSJarno Rajahalme #endif 14007d904c7bSJarno Rajahalme #ifdef CONFIG_NF_CONNTRACK_LABELS 14017d904c7bSJarno Rajahalme if (!info->commit && labels_nonzero(&info->labels.mask)) { 14027d904c7bSJarno Rajahalme OVS_NLERR(log, 14037d904c7bSJarno Rajahalme "Setting conntrack labels requires 'commit' flag."); 14047d904c7bSJarno Rajahalme return -EINVAL; 14057d904c7bSJarno Rajahalme } 14067d904c7bSJarno Rajahalme #endif 14077f8a436eSJoe Stringer if (rem > 0) { 14087f8a436eSJoe Stringer OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem); 14097f8a436eSJoe Stringer return -EINVAL; 14107f8a436eSJoe Stringer } 14117f8a436eSJoe Stringer 14127f8a436eSJoe Stringer return 0; 14137f8a436eSJoe Stringer } 14147f8a436eSJoe Stringer 1415c2ac6673SJoe Stringer bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) 14167f8a436eSJoe Stringer { 14177f8a436eSJoe Stringer if (attr == OVS_KEY_ATTR_CT_STATE) 14187f8a436eSJoe Stringer return true; 14197f8a436eSJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 14207f8a436eSJoe Stringer attr == OVS_KEY_ATTR_CT_ZONE) 14217f8a436eSJoe Stringer return true; 1422182e3042SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 1423182e3042SJoe Stringer attr == OVS_KEY_ATTR_CT_MARK) 1424182e3042SJoe Stringer return true; 1425c2ac6673SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 142633db4125SJoe Stringer attr == OVS_KEY_ATTR_CT_LABELS) { 1427c2ac6673SJoe Stringer struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1428c2ac6673SJoe Stringer 1429c2ac6673SJoe Stringer return ovs_net->xt_label; 1430c2ac6673SJoe Stringer } 14317f8a436eSJoe Stringer 14327f8a436eSJoe Stringer return false; 14337f8a436eSJoe Stringer } 14347f8a436eSJoe Stringer 14357f8a436eSJoe Stringer int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, 14367f8a436eSJoe Stringer const struct sw_flow_key *key, 14377f8a436eSJoe Stringer struct sw_flow_actions **sfa, bool log) 14387f8a436eSJoe Stringer { 14397f8a436eSJoe Stringer struct ovs_conntrack_info ct_info; 1440cae3a262SJoe Stringer const char *helper = NULL; 14417f8a436eSJoe Stringer u16 family; 14427f8a436eSJoe Stringer int err; 14437f8a436eSJoe Stringer 14447f8a436eSJoe Stringer family = key_to_nfproto(key); 14457f8a436eSJoe Stringer if (family == NFPROTO_UNSPEC) { 14467f8a436eSJoe Stringer OVS_NLERR(log, "ct family unspecified"); 14477f8a436eSJoe Stringer return -EINVAL; 14487f8a436eSJoe Stringer } 14497f8a436eSJoe Stringer 14507f8a436eSJoe Stringer memset(&ct_info, 0, sizeof(ct_info)); 14517f8a436eSJoe Stringer ct_info.family = family; 14527f8a436eSJoe Stringer 14537f8a436eSJoe Stringer nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID, 14547f8a436eSJoe Stringer NF_CT_DEFAULT_ZONE_DIR, 0); 14557f8a436eSJoe Stringer 1456cae3a262SJoe Stringer err = parse_ct(attr, &ct_info, &helper, log); 14577f8a436eSJoe Stringer if (err) 14587f8a436eSJoe Stringer return err; 14597f8a436eSJoe Stringer 14607f8a436eSJoe Stringer /* Set up template for tracking connections in specific zones. */ 14617f8a436eSJoe Stringer ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL); 14627f8a436eSJoe Stringer if (!ct_info.ct) { 14637f8a436eSJoe Stringer OVS_NLERR(log, "Failed to allocate conntrack template"); 14647f8a436eSJoe Stringer return -ENOMEM; 14657f8a436eSJoe Stringer } 146690c7afc9SJoe Stringer 146790c7afc9SJoe Stringer __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); 146890c7afc9SJoe Stringer nf_conntrack_get(&ct_info.ct->ct_general); 146990c7afc9SJoe Stringer 1470cae3a262SJoe Stringer if (helper) { 1471cae3a262SJoe Stringer err = ovs_ct_add_helper(&ct_info, helper, key, log); 1472cae3a262SJoe Stringer if (err) 1473cae3a262SJoe Stringer goto err_free_ct; 1474cae3a262SJoe Stringer } 14757f8a436eSJoe Stringer 14767f8a436eSJoe Stringer err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info, 14777f8a436eSJoe Stringer sizeof(ct_info), log); 14787f8a436eSJoe Stringer if (err) 14797f8a436eSJoe Stringer goto err_free_ct; 14807f8a436eSJoe Stringer 14817f8a436eSJoe Stringer return 0; 14827f8a436eSJoe Stringer err_free_ct: 14832f3ab9f9SJoe Stringer __ovs_ct_free_action(&ct_info); 14847f8a436eSJoe Stringer return err; 14857f8a436eSJoe Stringer } 14867f8a436eSJoe Stringer 148705752523SJarno Rajahalme #ifdef CONFIG_NF_NAT_NEEDED 148805752523SJarno Rajahalme static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, 148905752523SJarno Rajahalme struct sk_buff *skb) 149005752523SJarno Rajahalme { 149105752523SJarno Rajahalme struct nlattr *start; 149205752523SJarno Rajahalme 149305752523SJarno Rajahalme start = nla_nest_start(skb, OVS_CT_ATTR_NAT); 149405752523SJarno Rajahalme if (!start) 149505752523SJarno Rajahalme return false; 149605752523SJarno Rajahalme 149705752523SJarno Rajahalme if (info->nat & OVS_CT_SRC_NAT) { 149805752523SJarno Rajahalme if (nla_put_flag(skb, OVS_NAT_ATTR_SRC)) 149905752523SJarno Rajahalme return false; 150005752523SJarno Rajahalme } else if (info->nat & OVS_CT_DST_NAT) { 150105752523SJarno Rajahalme if (nla_put_flag(skb, OVS_NAT_ATTR_DST)) 150205752523SJarno Rajahalme return false; 150305752523SJarno Rajahalme } else { 150405752523SJarno Rajahalme goto out; 150505752523SJarno Rajahalme } 150605752523SJarno Rajahalme 150705752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { 150899b7248eSArnd Bergmann if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && 150999b7248eSArnd Bergmann info->family == NFPROTO_IPV4) { 151005752523SJarno Rajahalme if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, 151105752523SJarno Rajahalme info->range.min_addr.ip) || 151205752523SJarno Rajahalme (info->range.max_addr.ip 151305752523SJarno Rajahalme != info->range.min_addr.ip && 151405752523SJarno Rajahalme (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, 151505752523SJarno Rajahalme info->range.max_addr.ip)))) 151605752523SJarno Rajahalme return false; 151799b7248eSArnd Bergmann } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && 151899b7248eSArnd Bergmann info->family == NFPROTO_IPV6) { 151905752523SJarno Rajahalme if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, 152005752523SJarno Rajahalme &info->range.min_addr.in6) || 152105752523SJarno Rajahalme (memcmp(&info->range.max_addr.in6, 152205752523SJarno Rajahalme &info->range.min_addr.in6, 152305752523SJarno Rajahalme sizeof(info->range.max_addr.in6)) && 152405752523SJarno Rajahalme (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, 152505752523SJarno Rajahalme &info->range.max_addr.in6)))) 152605752523SJarno Rajahalme return false; 152705752523SJarno Rajahalme } else { 152805752523SJarno Rajahalme return false; 152905752523SJarno Rajahalme } 153005752523SJarno Rajahalme } 153105752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 153205752523SJarno Rajahalme (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, 153305752523SJarno Rajahalme ntohs(info->range.min_proto.all)) || 153405752523SJarno Rajahalme (info->range.max_proto.all != info->range.min_proto.all && 153505752523SJarno Rajahalme nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, 153605752523SJarno Rajahalme ntohs(info->range.max_proto.all))))) 153705752523SJarno Rajahalme return false; 153805752523SJarno Rajahalme 153905752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PERSISTENT && 154005752523SJarno Rajahalme nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT)) 154105752523SJarno Rajahalme return false; 154205752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM && 154305752523SJarno Rajahalme nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH)) 154405752523SJarno Rajahalme return false; 154505752523SJarno Rajahalme if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY && 154605752523SJarno Rajahalme nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM)) 154705752523SJarno Rajahalme return false; 154805752523SJarno Rajahalme out: 154905752523SJarno Rajahalme nla_nest_end(skb, start); 155005752523SJarno Rajahalme 155105752523SJarno Rajahalme return true; 155205752523SJarno Rajahalme } 155305752523SJarno Rajahalme #endif 155405752523SJarno Rajahalme 15557f8a436eSJoe Stringer int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, 15567f8a436eSJoe Stringer struct sk_buff *skb) 15577f8a436eSJoe Stringer { 15587f8a436eSJoe Stringer struct nlattr *start; 15597f8a436eSJoe Stringer 15607f8a436eSJoe Stringer start = nla_nest_start(skb, OVS_ACTION_ATTR_CT); 15617f8a436eSJoe Stringer if (!start) 15627f8a436eSJoe Stringer return -EMSGSIZE; 15637f8a436eSJoe Stringer 1564dd41d33fSJarno Rajahalme if (ct_info->commit && nla_put_flag(skb, ct_info->force 1565dd41d33fSJarno Rajahalme ? OVS_CT_ATTR_FORCE_COMMIT 1566dd41d33fSJarno Rajahalme : OVS_CT_ATTR_COMMIT)) 15677f8a436eSJoe Stringer return -EMSGSIZE; 15687f8a436eSJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 15697f8a436eSJoe Stringer nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) 15707f8a436eSJoe Stringer return -EMSGSIZE; 1571e754ec69SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask && 1572182e3042SJoe Stringer nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), 1573182e3042SJoe Stringer &ct_info->mark)) 1574182e3042SJoe Stringer return -EMSGSIZE; 1575c2ac6673SJoe Stringer if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1576e754ec69SJoe Stringer labels_nonzero(&ct_info->labels.mask) && 157733db4125SJoe Stringer nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), 157833db4125SJoe Stringer &ct_info->labels)) 1579c2ac6673SJoe Stringer return -EMSGSIZE; 1580cae3a262SJoe Stringer if (ct_info->helper) { 1581cae3a262SJoe Stringer if (nla_put_string(skb, OVS_CT_ATTR_HELPER, 1582cae3a262SJoe Stringer ct_info->helper->name)) 1583cae3a262SJoe Stringer return -EMSGSIZE; 1584cae3a262SJoe Stringer } 158512064551SJarno Rajahalme if (ct_info->have_eventmask && 158612064551SJarno Rajahalme nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask)) 158712064551SJarno Rajahalme return -EMSGSIZE; 158812064551SJarno Rajahalme 158905752523SJarno Rajahalme #ifdef CONFIG_NF_NAT_NEEDED 159005752523SJarno Rajahalme if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) 159105752523SJarno Rajahalme return -EMSGSIZE; 159205752523SJarno Rajahalme #endif 15937f8a436eSJoe Stringer nla_nest_end(skb, start); 15947f8a436eSJoe Stringer 15957f8a436eSJoe Stringer return 0; 15967f8a436eSJoe Stringer } 15977f8a436eSJoe Stringer 15987f8a436eSJoe Stringer void ovs_ct_free_action(const struct nlattr *a) 15997f8a436eSJoe Stringer { 16007f8a436eSJoe Stringer struct ovs_conntrack_info *ct_info = nla_data(a); 16017f8a436eSJoe Stringer 16022f3ab9f9SJoe Stringer __ovs_ct_free_action(ct_info); 16032f3ab9f9SJoe Stringer } 16042f3ab9f9SJoe Stringer 16052f3ab9f9SJoe Stringer static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) 16062f3ab9f9SJoe Stringer { 1607cae3a262SJoe Stringer if (ct_info->helper) 1608d91fc59cSLiping Zhang nf_conntrack_helper_put(ct_info->helper); 16097f8a436eSJoe Stringer if (ct_info->ct) 161076644232SJoe Stringer nf_ct_tmpl_free(ct_info->ct); 16117f8a436eSJoe Stringer } 1612c2ac6673SJoe Stringer 1613c2ac6673SJoe Stringer void ovs_ct_init(struct net *net) 1614c2ac6673SJoe Stringer { 161533db4125SJoe Stringer unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; 1616c2ac6673SJoe Stringer struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1617c2ac6673SJoe Stringer 1618adff6c65SFlorian Westphal if (nf_connlabels_get(net, n_bits - 1)) { 1619c2ac6673SJoe Stringer ovs_net->xt_label = false; 1620c2ac6673SJoe Stringer OVS_NLERR(true, "Failed to set connlabel length"); 1621c2ac6673SJoe Stringer } else { 1622c2ac6673SJoe Stringer ovs_net->xt_label = true; 1623c2ac6673SJoe Stringer } 1624c2ac6673SJoe Stringer } 1625c2ac6673SJoe Stringer 1626c2ac6673SJoe Stringer void ovs_ct_exit(struct net *net) 1627c2ac6673SJoe Stringer { 1628c2ac6673SJoe Stringer struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1629c2ac6673SJoe Stringer 1630c2ac6673SJoe Stringer if (ovs_net->xt_label) 1631c2ac6673SJoe Stringer nf_connlabels_put(net); 1632c2ac6673SJoe Stringer } 1633