1 /* 2 * Stateless NAT actions 3 * 4 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the Free 8 * Software Foundation; either version 2 of the License, or (at your option) 9 * any later version. 10 */ 11 12 #include <linux/errno.h> 13 #include <linux/init.h> 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/netfilter.h> 17 #include <linux/rtnetlink.h> 18 #include <linux/skbuff.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/string.h> 22 #include <linux/tc_act/tc_nat.h> 23 #include <net/act_api.h> 24 #include <net/icmp.h> 25 #include <net/ip.h> 26 #include <net/netlink.h> 27 #include <net/tc_act/tc_nat.h> 28 #include <net/tcp.h> 29 #include <net/udp.h> 30 31 32 #define NAT_TAB_MASK 15 33 static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1]; 34 static u32 nat_idx_gen; 35 static DEFINE_RWLOCK(nat_lock); 36 37 static struct tcf_hashinfo nat_hash_info = { 38 .htab = tcf_nat_ht, 39 .hmask = NAT_TAB_MASK, 40 .lock = &nat_lock, 41 }; 42 43 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { 44 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, 45 }; 46 47 static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, 48 struct tc_action *a, int ovr, int bind) 49 { 50 struct nlattr *tb[TCA_NAT_MAX + 1]; 51 struct tc_nat *parm; 52 int ret = 0, err; 53 struct tcf_nat *p; 54 struct tcf_common *pc; 55 56 if (nla == NULL) 57 return -EINVAL; 58 59 err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy); 60 if (err < 0) 61 return err; 62 63 if (tb[TCA_NAT_PARMS] == NULL) 64 return -EINVAL; 65 parm = nla_data(tb[TCA_NAT_PARMS]); 66 67 pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); 68 if (!pc) { 69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 70 &nat_idx_gen, &nat_hash_info); 71 if (IS_ERR(pc)) 72 return PTR_ERR(pc); 73 p = to_tcf_nat(pc); 74 ret = ACT_P_CREATED; 75 } else { 76 p = to_tcf_nat(pc); 77 if (!ovr) { 78 tcf_hash_release(pc, bind, &nat_hash_info); 79 return -EEXIST; 80 } 81 } 82 83 spin_lock_bh(&p->tcf_lock); 84 p->old_addr = parm->old_addr; 85 p->new_addr = parm->new_addr; 86 p->mask = parm->mask; 87 p->flags = parm->flags; 88 89 p->tcf_action = parm->action; 90 spin_unlock_bh(&p->tcf_lock); 91 92 if (ret == ACT_P_CREATED) 93 tcf_hash_insert(pc, &nat_hash_info); 94 95 return ret; 96 } 97 98 static int tcf_nat_cleanup(struct tc_action *a, int bind) 99 { 100 struct tcf_nat *p = a->priv; 101 102 return tcf_hash_release(&p->common, bind, &nat_hash_info); 103 } 104 105 static int tcf_nat(struct sk_buff *skb, struct tc_action *a, 106 struct tcf_result *res) 107 { 108 struct tcf_nat *p = a->priv; 109 struct iphdr *iph; 110 __be32 old_addr; 111 __be32 new_addr; 112 __be32 mask; 113 __be32 addr; 114 int egress; 115 int action; 116 int ihl; 117 int noff; 118 119 spin_lock(&p->tcf_lock); 120 121 p->tcf_tm.lastuse = jiffies; 122 old_addr = p->old_addr; 123 new_addr = p->new_addr; 124 mask = p->mask; 125 egress = p->flags & TCA_NAT_FLAG_EGRESS; 126 action = p->tcf_action; 127 128 p->tcf_bstats.bytes += qdisc_pkt_len(skb); 129 p->tcf_bstats.packets++; 130 131 spin_unlock(&p->tcf_lock); 132 133 if (unlikely(action == TC_ACT_SHOT)) 134 goto drop; 135 136 noff = skb_network_offset(skb); 137 if (!pskb_may_pull(skb, sizeof(*iph) + noff)) 138 goto drop; 139 140 iph = ip_hdr(skb); 141 142 if (egress) 143 addr = iph->saddr; 144 else 145 addr = iph->daddr; 146 147 if (!((old_addr ^ addr) & mask)) { 148 if (skb_cloned(skb) && 149 !skb_clone_writable(skb, sizeof(*iph) + noff) && 150 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 151 goto drop; 152 153 new_addr &= mask; 154 new_addr |= addr & ~mask; 155 156 /* Rewrite IP header */ 157 iph = ip_hdr(skb); 158 if (egress) 159 iph->saddr = new_addr; 160 else 161 iph->daddr = new_addr; 162 163 csum_replace4(&iph->check, addr, new_addr); 164 } else if ((iph->frag_off & htons(IP_OFFSET)) || 165 iph->protocol != IPPROTO_ICMP) { 166 goto out; 167 } 168 169 ihl = iph->ihl * 4; 170 171 /* It would be nice to share code with stateful NAT. */ 172 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { 173 case IPPROTO_TCP: 174 { 175 struct tcphdr *tcph; 176 177 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || 178 (skb_cloned(skb) && 179 !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) && 180 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 181 goto drop; 182 183 tcph = (void *)(skb_network_header(skb) + ihl); 184 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); 185 break; 186 } 187 case IPPROTO_UDP: 188 { 189 struct udphdr *udph; 190 191 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || 192 (skb_cloned(skb) && 193 !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) && 194 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 195 goto drop; 196 197 udph = (void *)(skb_network_header(skb) + ihl); 198 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 199 inet_proto_csum_replace4(&udph->check, skb, addr, 200 new_addr, 1); 201 if (!udph->check) 202 udph->check = CSUM_MANGLED_0; 203 } 204 break; 205 } 206 case IPPROTO_ICMP: 207 { 208 struct icmphdr *icmph; 209 210 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff)) 211 goto drop; 212 213 icmph = (void *)(skb_network_header(skb) + ihl); 214 215 if ((icmph->type != ICMP_DEST_UNREACH) && 216 (icmph->type != ICMP_TIME_EXCEEDED) && 217 (icmph->type != ICMP_PARAMETERPROB)) 218 break; 219 220 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) + 221 noff)) 222 goto drop; 223 224 icmph = (void *)(skb_network_header(skb) + ihl); 225 iph = (void *)(icmph + 1); 226 if (egress) 227 addr = iph->daddr; 228 else 229 addr = iph->saddr; 230 231 if ((old_addr ^ addr) & mask) 232 break; 233 234 if (skb_cloned(skb) && 235 !skb_clone_writable(skb, ihl + sizeof(*icmph) + 236 sizeof(*iph) + noff) && 237 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 238 goto drop; 239 240 icmph = (void *)(skb_network_header(skb) + ihl); 241 iph = (void *)(icmph + 1); 242 243 new_addr &= mask; 244 new_addr |= addr & ~mask; 245 246 /* XXX Fix up the inner checksums. */ 247 if (egress) 248 iph->daddr = new_addr; 249 else 250 iph->saddr = new_addr; 251 252 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 253 0); 254 break; 255 } 256 default: 257 break; 258 } 259 260 out: 261 return action; 262 263 drop: 264 spin_lock(&p->tcf_lock); 265 p->tcf_qstats.drops++; 266 spin_unlock(&p->tcf_lock); 267 return TC_ACT_SHOT; 268 } 269 270 static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, 271 int bind, int ref) 272 { 273 unsigned char *b = skb_tail_pointer(skb); 274 struct tcf_nat *p = a->priv; 275 struct tc_nat opt = { 276 .old_addr = p->old_addr, 277 .new_addr = p->new_addr, 278 .mask = p->mask, 279 .flags = p->flags, 280 281 .index = p->tcf_index, 282 .action = p->tcf_action, 283 .refcnt = p->tcf_refcnt - ref, 284 .bindcnt = p->tcf_bindcnt - bind, 285 }; 286 struct tcf_t t; 287 288 NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt); 289 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 290 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 291 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 292 NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); 293 294 return skb->len; 295 296 nla_put_failure: 297 nlmsg_trim(skb, b); 298 return -1; 299 } 300 301 static struct tc_action_ops act_nat_ops = { 302 .kind = "nat", 303 .hinfo = &nat_hash_info, 304 .type = TCA_ACT_NAT, 305 .capab = TCA_CAP_NONE, 306 .owner = THIS_MODULE, 307 .act = tcf_nat, 308 .dump = tcf_nat_dump, 309 .cleanup = tcf_nat_cleanup, 310 .lookup = tcf_hash_search, 311 .init = tcf_nat_init, 312 .walk = tcf_generic_walker 313 }; 314 315 MODULE_DESCRIPTION("Stateless NAT actions"); 316 MODULE_LICENSE("GPL"); 317 318 static int __init nat_init_module(void) 319 { 320 return tcf_register_action(&act_nat_ops); 321 } 322 323 static void __exit nat_cleanup_module(void) 324 { 325 tcf_unregister_action(&act_nat_ops); 326 } 327 328 module_init(nat_init_module); 329 module_exit(nat_cleanup_module); 330