1 /* 2 * Stateless NAT actions 3 * 4 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the Free 8 * Software Foundation; either version 2 of the License, or (at your option) 9 * any later version. 10 */ 11 12 #include <linux/errno.h> 13 #include <linux/init.h> 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/netfilter.h> 17 #include <linux/rtnetlink.h> 18 #include <linux/skbuff.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/string.h> 22 #include <linux/tc_act/tc_nat.h> 23 #include <net/act_api.h> 24 #include <net/icmp.h> 25 #include <net/ip.h> 26 #include <net/netlink.h> 27 #include <net/tc_act/tc_nat.h> 28 #include <net/tcp.h> 29 #include <net/udp.h> 30 31 32 #define NAT_TAB_MASK 15 33 34 static struct tcf_hashinfo nat_hash_info; 35 36 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { 37 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, 38 }; 39 40 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, 41 struct tc_action *a, int ovr, int bind) 42 { 43 struct nlattr *tb[TCA_NAT_MAX + 1]; 44 struct tc_nat *parm; 45 int ret = 0, err; 46 struct tcf_nat *p; 47 struct tcf_common *pc; 48 49 if (nla == NULL) 50 return -EINVAL; 51 52 err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy); 53 if (err < 0) 54 return err; 55 56 if (tb[TCA_NAT_PARMS] == NULL) 57 return -EINVAL; 58 parm = nla_data(tb[TCA_NAT_PARMS]); 59 60 pc = tcf_hash_check(parm->index, a, bind); 61 if (!pc) { 62 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); 63 if (IS_ERR(pc)) 64 return PTR_ERR(pc); 65 ret = ACT_P_CREATED; 66 } else { 67 if (bind) 68 return 0; 69 tcf_hash_release(pc, bind, a->ops->hinfo); 70 if (!ovr) 71 return -EEXIST; 72 } 73 p = to_tcf_nat(pc); 74 75 spin_lock_bh(&p->tcf_lock); 76 p->old_addr = parm->old_addr; 77 p->new_addr = parm->new_addr; 78 p->mask = parm->mask; 79 p->flags = parm->flags; 80 81 p->tcf_action = parm->action; 82 spin_unlock_bh(&p->tcf_lock); 83 84 if (ret == ACT_P_CREATED) 85 tcf_hash_insert(pc, a->ops->hinfo); 86 87 return ret; 88 } 89 90 static int tcf_nat_cleanup(struct tc_action *a, int bind) 91 { 92 struct tcf_nat *p = a->priv; 93 94 return tcf_hash_release(&p->common, bind, &nat_hash_info); 95 } 96 97 static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, 98 struct tcf_result *res) 99 { 100 struct tcf_nat *p = a->priv; 101 struct iphdr *iph; 102 __be32 old_addr; 103 __be32 new_addr; 104 __be32 mask; 105 __be32 addr; 106 int egress; 107 int action; 108 int ihl; 109 int noff; 110 111 spin_lock(&p->tcf_lock); 112 113 p->tcf_tm.lastuse = jiffies; 114 old_addr = p->old_addr; 115 new_addr = p->new_addr; 116 mask = p->mask; 117 egress = p->flags & TCA_NAT_FLAG_EGRESS; 118 action = p->tcf_action; 119 120 bstats_update(&p->tcf_bstats, skb); 121 122 spin_unlock(&p->tcf_lock); 123 124 if (unlikely(action == TC_ACT_SHOT)) 125 goto drop; 126 127 noff = skb_network_offset(skb); 128 if (!pskb_may_pull(skb, sizeof(*iph) + noff)) 129 goto drop; 130 131 iph = ip_hdr(skb); 132 133 if (egress) 134 addr = iph->saddr; 135 else 136 addr = iph->daddr; 137 138 if (!((old_addr ^ addr) & mask)) { 139 if (skb_cloned(skb) && 140 !skb_clone_writable(skb, sizeof(*iph) + noff) && 141 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 142 goto drop; 143 144 new_addr &= mask; 145 new_addr |= addr & ~mask; 146 147 /* Rewrite IP header */ 148 iph = ip_hdr(skb); 149 if (egress) 150 iph->saddr = new_addr; 151 else 152 iph->daddr = new_addr; 153 154 csum_replace4(&iph->check, addr, new_addr); 155 } else if ((iph->frag_off & htons(IP_OFFSET)) || 156 iph->protocol != IPPROTO_ICMP) { 157 goto out; 158 } 159 160 ihl = iph->ihl * 4; 161 162 /* It would be nice to share code with stateful NAT. */ 163 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { 164 case IPPROTO_TCP: 165 { 166 struct tcphdr *tcph; 167 168 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || 169 (skb_cloned(skb) && 170 !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) && 171 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 172 goto drop; 173 174 tcph = (void *)(skb_network_header(skb) + ihl); 175 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); 176 break; 177 } 178 case IPPROTO_UDP: 179 { 180 struct udphdr *udph; 181 182 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || 183 (skb_cloned(skb) && 184 !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) && 185 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 186 goto drop; 187 188 udph = (void *)(skb_network_header(skb) + ihl); 189 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 190 inet_proto_csum_replace4(&udph->check, skb, addr, 191 new_addr, 1); 192 if (!udph->check) 193 udph->check = CSUM_MANGLED_0; 194 } 195 break; 196 } 197 case IPPROTO_ICMP: 198 { 199 struct icmphdr *icmph; 200 201 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff)) 202 goto drop; 203 204 icmph = (void *)(skb_network_header(skb) + ihl); 205 206 if ((icmph->type != ICMP_DEST_UNREACH) && 207 (icmph->type != ICMP_TIME_EXCEEDED) && 208 (icmph->type != ICMP_PARAMETERPROB)) 209 break; 210 211 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) + 212 noff)) 213 goto drop; 214 215 icmph = (void *)(skb_network_header(skb) + ihl); 216 iph = (void *)(icmph + 1); 217 if (egress) 218 addr = iph->daddr; 219 else 220 addr = iph->saddr; 221 222 if ((old_addr ^ addr) & mask) 223 break; 224 225 if (skb_cloned(skb) && 226 !skb_clone_writable(skb, ihl + sizeof(*icmph) + 227 sizeof(*iph) + noff) && 228 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 229 goto drop; 230 231 icmph = (void *)(skb_network_header(skb) + ihl); 232 iph = (void *)(icmph + 1); 233 234 new_addr &= mask; 235 new_addr |= addr & ~mask; 236 237 /* XXX Fix up the inner checksums. */ 238 if (egress) 239 iph->daddr = new_addr; 240 else 241 iph->saddr = new_addr; 242 243 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 244 0); 245 break; 246 } 247 default: 248 break; 249 } 250 251 out: 252 return action; 253 254 drop: 255 spin_lock(&p->tcf_lock); 256 p->tcf_qstats.drops++; 257 spin_unlock(&p->tcf_lock); 258 return TC_ACT_SHOT; 259 } 260 261 static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, 262 int bind, int ref) 263 { 264 unsigned char *b = skb_tail_pointer(skb); 265 struct tcf_nat *p = a->priv; 266 struct tc_nat opt = { 267 .old_addr = p->old_addr, 268 .new_addr = p->new_addr, 269 .mask = p->mask, 270 .flags = p->flags, 271 272 .index = p->tcf_index, 273 .action = p->tcf_action, 274 .refcnt = p->tcf_refcnt - ref, 275 .bindcnt = p->tcf_bindcnt - bind, 276 }; 277 struct tcf_t t; 278 279 if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt)) 280 goto nla_put_failure; 281 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 282 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 283 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 284 if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t)) 285 goto nla_put_failure; 286 287 return skb->len; 288 289 nla_put_failure: 290 nlmsg_trim(skb, b); 291 return -1; 292 } 293 294 static struct tc_action_ops act_nat_ops = { 295 .kind = "nat", 296 .hinfo = &nat_hash_info, 297 .type = TCA_ACT_NAT, 298 .owner = THIS_MODULE, 299 .act = tcf_nat, 300 .dump = tcf_nat_dump, 301 .cleanup = tcf_nat_cleanup, 302 .init = tcf_nat_init, 303 }; 304 305 MODULE_DESCRIPTION("Stateless NAT actions"); 306 MODULE_LICENSE("GPL"); 307 308 static int __init nat_init_module(void) 309 { 310 int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK); 311 if (err) 312 return err; 313 return tcf_register_action(&act_nat_ops); 314 } 315 316 static void __exit nat_cleanup_module(void) 317 { 318 tcf_unregister_action(&act_nat_ops); 319 tcf_hashinfo_destroy(&nat_hash_info); 320 } 321 322 module_init(nat_init_module); 323 module_exit(nat_cleanup_module); 324