1 /* 2 * Stateless NAT actions 3 * 4 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the Free 8 * Software Foundation; either version 2 of the License, or (at your option) 9 * any later version. 10 */ 11 12 #include <linux/errno.h> 13 #include <linux/init.h> 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/netfilter.h> 17 #include <linux/rtnetlink.h> 18 #include <linux/skbuff.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/string.h> 22 #include <linux/tc_act/tc_nat.h> 23 #include <net/act_api.h> 24 #include <net/icmp.h> 25 #include <net/ip.h> 26 #include <net/netlink.h> 27 #include <net/tc_act/tc_nat.h> 28 #include <net/tcp.h> 29 #include <net/udp.h> 30 31 32 #define NAT_TAB_MASK 15 33 static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1]; 34 static u32 nat_idx_gen; 35 static DEFINE_RWLOCK(nat_lock); 36 37 static struct tcf_hashinfo nat_hash_info = { 38 .htab = tcf_nat_ht, 39 .hmask = NAT_TAB_MASK, 40 .lock = &nat_lock, 41 }; 42 43 static int tcf_nat_init(struct rtattr *rta, struct rtattr *est, 44 struct tc_action *a, int ovr, int bind) 45 { 46 struct rtattr *tb[TCA_NAT_MAX]; 47 struct tc_nat *parm; 48 int ret = 0; 49 struct tcf_nat *p; 50 struct tcf_common *pc; 51 52 if (rta == NULL || rtattr_parse_nested(tb, TCA_NAT_MAX, rta) < 0) 53 return -EINVAL; 54 55 if (tb[TCA_NAT_PARMS - 1] == NULL || 56 RTA_PAYLOAD(tb[TCA_NAT_PARMS - 1]) < sizeof(*parm)) 57 return -EINVAL; 58 parm = RTA_DATA(tb[TCA_NAT_PARMS - 1]); 59 60 pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); 61 if (!pc) { 62 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 63 &nat_idx_gen, &nat_hash_info); 64 if (unlikely(!pc)) 65 return -ENOMEM; 66 p = to_tcf_nat(pc); 67 ret = ACT_P_CREATED; 68 } else { 69 p = to_tcf_nat(pc); 70 if (!ovr) { 71 tcf_hash_release(pc, bind, &nat_hash_info); 72 return -EEXIST; 73 } 74 } 75 76 spin_lock_bh(&p->tcf_lock); 77 p->old_addr = parm->old_addr; 78 p->new_addr = parm->new_addr; 79 p->mask = parm->mask; 80 p->flags = parm->flags; 81 82 p->tcf_action = parm->action; 83 spin_unlock_bh(&p->tcf_lock); 84 85 if (ret == ACT_P_CREATED) 86 tcf_hash_insert(pc, &nat_hash_info); 87 88 return ret; 89 } 90 91 static int tcf_nat_cleanup(struct tc_action *a, int bind) 92 { 93 struct tcf_nat *p = a->priv; 94 95 return tcf_hash_release(&p->common, bind, &nat_hash_info); 96 } 97 98 static int tcf_nat(struct sk_buff *skb, struct tc_action *a, 99 struct tcf_result *res) 100 { 101 struct tcf_nat *p = a->priv; 102 struct iphdr *iph; 103 __be32 old_addr; 104 __be32 new_addr; 105 __be32 mask; 106 __be32 addr; 107 int egress; 108 int action; 109 int ihl; 110 111 spin_lock(&p->tcf_lock); 112 113 p->tcf_tm.lastuse = jiffies; 114 old_addr = p->old_addr; 115 new_addr = p->new_addr; 116 mask = p->mask; 117 egress = p->flags & TCA_NAT_FLAG_EGRESS; 118 action = p->tcf_action; 119 120 p->tcf_bstats.bytes += skb->len; 121 p->tcf_bstats.packets++; 122 123 spin_unlock(&p->tcf_lock); 124 125 if (unlikely(action == TC_ACT_SHOT)) 126 goto drop; 127 128 if (!pskb_may_pull(skb, sizeof(*iph))) 129 goto drop; 130 131 iph = ip_hdr(skb); 132 133 if (egress) 134 addr = iph->saddr; 135 else 136 addr = iph->daddr; 137 138 if (!((old_addr ^ addr) & mask)) { 139 if (skb_cloned(skb) && 140 !skb_clone_writable(skb, sizeof(*iph)) && 141 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 142 goto drop; 143 144 new_addr &= mask; 145 new_addr |= addr & ~mask; 146 147 /* Rewrite IP header */ 148 iph = ip_hdr(skb); 149 if (egress) 150 iph->saddr = new_addr; 151 else 152 iph->daddr = new_addr; 153 154 nf_csum_replace4(&iph->check, addr, new_addr); 155 } 156 157 ihl = iph->ihl * 4; 158 159 /* It would be nice to share code with stateful NAT. */ 160 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { 161 case IPPROTO_TCP: 162 { 163 struct tcphdr *tcph; 164 165 if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) || 166 (skb_cloned(skb) && 167 !skb_clone_writable(skb, ihl + sizeof(*tcph)) && 168 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 169 goto drop; 170 171 tcph = (void *)(skb_network_header(skb) + ihl); 172 nf_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); 173 break; 174 } 175 case IPPROTO_UDP: 176 { 177 struct udphdr *udph; 178 179 if (!pskb_may_pull(skb, ihl + sizeof(*udph)) || 180 (skb_cloned(skb) && 181 !skb_clone_writable(skb, ihl + sizeof(*udph)) && 182 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 183 goto drop; 184 185 udph = (void *)(skb_network_header(skb) + ihl); 186 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 187 nf_proto_csum_replace4(&udph->check, skb, addr, 188 new_addr, 1); 189 if (!udph->check) 190 udph->check = CSUM_MANGLED_0; 191 } 192 break; 193 } 194 case IPPROTO_ICMP: 195 { 196 struct icmphdr *icmph; 197 198 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) 199 goto drop; 200 201 icmph = (void *)(skb_network_header(skb) + ihl); 202 203 if ((icmph->type != ICMP_DEST_UNREACH) && 204 (icmph->type != ICMP_TIME_EXCEEDED) && 205 (icmph->type != ICMP_PARAMETERPROB)) 206 break; 207 208 iph = (void *)(icmph + 1); 209 if (egress) 210 addr = iph->daddr; 211 else 212 addr = iph->saddr; 213 214 if ((old_addr ^ addr) & mask) 215 break; 216 217 if (skb_cloned(skb) && 218 !skb_clone_writable(skb, 219 ihl + sizeof(*icmph) + sizeof(*iph)) && 220 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 221 goto drop; 222 223 icmph = (void *)(skb_network_header(skb) + ihl); 224 iph = (void *)(icmph + 1); 225 226 new_addr &= mask; 227 new_addr |= addr & ~mask; 228 229 /* XXX Fix up the inner checksums. */ 230 if (egress) 231 iph->daddr = new_addr; 232 else 233 iph->saddr = new_addr; 234 235 nf_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 236 1); 237 break; 238 } 239 default: 240 break; 241 } 242 243 return action; 244 245 drop: 246 spin_lock(&p->tcf_lock); 247 p->tcf_qstats.drops++; 248 spin_unlock(&p->tcf_lock); 249 return TC_ACT_SHOT; 250 } 251 252 static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, 253 int bind, int ref) 254 { 255 unsigned char *b = skb_tail_pointer(skb); 256 struct tcf_nat *p = a->priv; 257 struct tc_nat *opt; 258 struct tcf_t t; 259 int s; 260 261 s = sizeof(*opt); 262 263 /* netlink spinlocks held above us - must use ATOMIC */ 264 opt = kzalloc(s, GFP_ATOMIC); 265 if (unlikely(!opt)) 266 return -ENOBUFS; 267 268 opt->old_addr = p->old_addr; 269 opt->new_addr = p->new_addr; 270 opt->mask = p->mask; 271 opt->flags = p->flags; 272 273 opt->index = p->tcf_index; 274 opt->action = p->tcf_action; 275 opt->refcnt = p->tcf_refcnt - ref; 276 opt->bindcnt = p->tcf_bindcnt - bind; 277 278 RTA_PUT(skb, TCA_NAT_PARMS, s, opt); 279 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 280 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 281 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 282 RTA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); 283 284 kfree(opt); 285 286 return skb->len; 287 288 rtattr_failure: 289 nlmsg_trim(skb, b); 290 kfree(opt); 291 return -1; 292 } 293 294 static struct tc_action_ops act_nat_ops = { 295 .kind = "nat", 296 .hinfo = &nat_hash_info, 297 .type = TCA_ACT_NAT, 298 .capab = TCA_CAP_NONE, 299 .owner = THIS_MODULE, 300 .act = tcf_nat, 301 .dump = tcf_nat_dump, 302 .cleanup = tcf_nat_cleanup, 303 .lookup = tcf_hash_search, 304 .init = tcf_nat_init, 305 .walk = tcf_generic_walker 306 }; 307 308 MODULE_DESCRIPTION("Stateless NAT actions"); 309 MODULE_LICENSE("GPL"); 310 311 static int __init nat_init_module(void) 312 { 313 return tcf_register_action(&act_nat_ops); 314 } 315 316 static void __exit nat_cleanup_module(void) 317 { 318 tcf_unregister_action(&act_nat_ops); 319 } 320 321 module_init(nat_init_module); 322 module_exit(nat_cleanup_module); 323