1 /* 2 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS 3 * 4 * Portions Copyright (C) 2001-2002 5 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. 6 * 7 * Portions Copyright (C) 2003-2010 8 * Julian Anastasov 9 * 10 * 11 * This code is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License as published by 13 * the Free Software Foundation; either version 2 of the License, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, see <http://www.gnu.org/licenses/>. 23 * 24 * 25 * Authors: 26 * Ben North <ben@redfrontdoor.org> 27 * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels 28 * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match 29 * 30 * 31 * Current status: 32 * 33 * - provide conntrack confirmation for new and related connections, by 34 * this way we can see their proper conntrack state in all hooks 35 * - support for all forwarding methods, not only NAT 36 * - FTP support (NAT), ability to support other NAT apps with expectations 37 * - to correctly create expectations for related NAT connections the proper 38 * NF conntrack support must be already installed, eg. ip_vs_ftp requires 39 * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables 40 * NAT rules are needed) 41 * - alter reply for NAT when forwarding packet in original direction: 42 * conntrack from client in NEW or RELATED (Passive FTP DATA) state or 43 * when RELATED conntrack is created from real server (Active FTP DATA) 44 * - if iptables_nat is not loaded the Passive FTP will not work (the 45 * PASV response can not be NAT-ed) but Active FTP should work 46 * 47 */ 48 49 #define KMSG_COMPONENT "IPVS" 50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 51 52 #include <linux/module.h> 53 #include <linux/types.h> 54 #include <linux/kernel.h> 55 #include <linux/errno.h> 56 #include <linux/compiler.h> 57 #include <linux/vmalloc.h> 58 #include <linux/skbuff.h> 59 #include <net/ip.h> 60 #include <linux/netfilter.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <net/ip_vs.h> 63 #include <net/netfilter/nf_conntrack_core.h> 64 #include <net/netfilter/nf_conntrack_expect.h> 65 #include <net/netfilter/nf_conntrack_seqadj.h> 66 #include <net/netfilter/nf_conntrack_helper.h> 67 #include <net/netfilter/nf_conntrack_zones.h> 68 69 70 #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" 71 #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ 72 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ 73 (T)->dst.protonum 74 75 #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" 76 #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ 77 &((C)->vaddr.ip), ntohs((C)->vport), \ 78 &((C)->daddr.ip), ntohs((C)->dport), \ 79 (C)->protocol, (C)->state 80 81 void 82 ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) 83 { 84 enum ip_conntrack_info ctinfo; 85 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 86 struct nf_conntrack_tuple new_tuple; 87 88 if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || 89 nf_ct_is_dying(ct)) 90 return; 91 92 /* Never alter conntrack for non-NAT conns */ 93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) 94 return; 95 96 /* Alter reply only in original direction */ 97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 98 return; 99 100 /* Applications may adjust TCP seqs */ 101 if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && 102 !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) 103 return; 104 105 /* 106 * The connection is not yet in the hashtable, so we update it. 107 * CIP->VIP will remain the same, so leave the tuple in 108 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the 109 * real-server we will see RIP->DIP. 110 */ 111 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 112 /* 113 * This will also take care of UDP and other protocols. 114 */ 115 if (outin) { 116 new_tuple.src.u3 = cp->daddr; 117 if (new_tuple.dst.protonum != IPPROTO_ICMP && 118 new_tuple.dst.protonum != IPPROTO_ICMPV6) 119 new_tuple.src.u.tcp.port = cp->dport; 120 } else { 121 new_tuple.dst.u3 = cp->vaddr; 122 if (new_tuple.dst.protonum != IPPROTO_ICMP && 123 new_tuple.dst.protonum != IPPROTO_ICMPV6) 124 new_tuple.dst.u.tcp.port = cp->vport; 125 } 126 IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " 127 "ctinfo=%d, old reply=" FMT_TUPLE 128 ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", 129 __func__, ct, ct->status, ctinfo, 130 ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), 131 ARG_TUPLE(&new_tuple), ARG_CONN(cp)); 132 nf_conntrack_alter_reply(ct, &new_tuple); 133 } 134 135 int ip_vs_confirm_conntrack(struct sk_buff *skb) 136 { 137 return nf_conntrack_confirm(skb); 138 } 139 140 /* 141 * Called from init_conntrack() as expectfn handler. 142 */ 143 static void ip_vs_nfct_expect_callback(struct nf_conn *ct, 144 struct nf_conntrack_expect *exp) 145 { 146 struct nf_conntrack_tuple *orig, new_reply; 147 struct ip_vs_conn *cp; 148 struct ip_vs_conn_param p; 149 struct net *net = nf_ct_net(ct); 150 151 if (exp->tuple.src.l3num != PF_INET) 152 return; 153 154 /* 155 * We assume that no NF locks are held before this callback. 156 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their 157 * expectations even if they use wildcard values, now we provide the 158 * actual values from the newly created original conntrack direction. 159 * The conntrack is confirmed when packet reaches IPVS hooks. 160 */ 161 162 /* RS->CLIENT */ 163 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 164 ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum, 165 &orig->src.u3, orig->src.u.tcp.port, 166 &orig->dst.u3, orig->dst.u.tcp.port, &p); 167 cp = ip_vs_conn_out_get(&p); 168 if (cp) { 169 /* Change reply CLIENT->RS to CLIENT->VS */ 170 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 171 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " 172 FMT_TUPLE ", found inout cp=" FMT_CONN "\n", 173 __func__, ct, ct->status, 174 ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 175 ARG_CONN(cp)); 176 new_reply.dst.u3 = cp->vaddr; 177 new_reply.dst.u.tcp.port = cp->vport; 178 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE 179 ", inout cp=" FMT_CONN "\n", 180 __func__, ct, 181 ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 182 ARG_CONN(cp)); 183 goto alter; 184 } 185 186 /* CLIENT->VS */ 187 cp = ip_vs_conn_in_get(&p); 188 if (cp) { 189 /* Change reply VS->CLIENT to RS->CLIENT */ 190 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 191 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " 192 FMT_TUPLE ", found outin cp=" FMT_CONN "\n", 193 __func__, ct, ct->status, 194 ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 195 ARG_CONN(cp)); 196 new_reply.src.u3 = cp->daddr; 197 new_reply.src.u.tcp.port = cp->dport; 198 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " 199 FMT_TUPLE ", outin cp=" FMT_CONN "\n", 200 __func__, ct, 201 ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 202 ARG_CONN(cp)); 203 goto alter; 204 } 205 206 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE 207 " - unknown expect\n", 208 __func__, ct, ct->status, ARG_TUPLE(orig)); 209 return; 210 211 alter: 212 /* Never alter conntrack for non-NAT conns */ 213 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) 214 nf_conntrack_alter_reply(ct, &new_reply); 215 ip_vs_conn_put(cp); 216 return; 217 } 218 219 /* 220 * Create NF conntrack expectation with wildcard (optional) source port. 221 * Then the default callback function will alter the reply and will confirm 222 * the conntrack entry when the first packet comes. 223 * Use port 0 to expect connection from any port. 224 */ 225 void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, 226 struct ip_vs_conn *cp, u_int8_t proto, 227 const __be16 port, int from_rs) 228 { 229 struct nf_conntrack_expect *exp; 230 231 if (ct == NULL || nf_ct_is_untracked(ct)) 232 return; 233 234 exp = nf_ct_expect_alloc(ct); 235 if (!exp) 236 return; 237 238 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), 239 from_rs ? &cp->daddr : &cp->caddr, 240 from_rs ? &cp->caddr : &cp->vaddr, 241 proto, port ? &port : NULL, 242 from_rs ? &cp->cport : &cp->vport); 243 244 exp->expectfn = ip_vs_nfct_expect_callback; 245 246 IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", 247 __func__, ct, ARG_TUPLE(&exp->tuple)); 248 nf_ct_expect_related(exp); 249 nf_ct_expect_put(exp); 250 } 251 EXPORT_SYMBOL(ip_vs_nfct_expect_related); 252 253 /* 254 * Our connection was terminated, try to drop the conntrack immediately 255 */ 256 void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) 257 { 258 struct nf_conntrack_tuple_hash *h; 259 struct nf_conn *ct; 260 struct nf_conntrack_tuple tuple; 261 262 if (!cp->cport) 263 return; 264 265 tuple = (struct nf_conntrack_tuple) { 266 .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; 267 tuple.src.u3 = cp->caddr; 268 tuple.src.u.all = cp->cport; 269 tuple.src.l3num = cp->af; 270 tuple.dst.u3 = cp->vaddr; 271 tuple.dst.u.all = cp->vport; 272 273 IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE 274 " for conn " FMT_CONN "\n", 275 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); 276 277 h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); 278 if (h) { 279 ct = nf_ct_tuplehash_to_ctrack(h); 280 /* Show what happens instead of calling nf_ct_kill() */ 281 if (del_timer(&ct->timeout)) { 282 IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" 283 FMT_TUPLE "\n", 284 __func__, ct, ARG_TUPLE(&tuple)); 285 if (ct->timeout.function) 286 ct->timeout.function(ct->timeout.data); 287 } else { 288 IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" 289 FMT_TUPLE "\n", 290 __func__, ct, ARG_TUPLE(&tuple)); 291 } 292 nf_ct_put(ct); 293 } else { 294 IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", 295 __func__, ARG_TUPLE(&tuple)); 296 } 297 } 298 299