1b4219952SHerbert Xu /* 2b4219952SHerbert Xu * Stateless NAT actions 3b4219952SHerbert Xu * 4b4219952SHerbert Xu * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 5b4219952SHerbert Xu * 6b4219952SHerbert Xu * This program is free software; you can redistribute it and/or modify it 7b4219952SHerbert Xu * under the terms of the GNU General Public License as published by the Free 8b4219952SHerbert Xu * Software Foundation; either version 2 of the License, or (at your option) 9b4219952SHerbert Xu * any later version. 10b4219952SHerbert Xu */ 11b4219952SHerbert Xu 12b4219952SHerbert Xu #include <linux/errno.h> 13b4219952SHerbert Xu #include <linux/init.h> 14b4219952SHerbert Xu #include <linux/kernel.h> 15b4219952SHerbert Xu #include <linux/module.h> 16b4219952SHerbert Xu #include <linux/netfilter.h> 17b4219952SHerbert Xu #include <linux/rtnetlink.h> 18b4219952SHerbert Xu #include <linux/skbuff.h> 19b4219952SHerbert Xu #include <linux/slab.h> 20b4219952SHerbert Xu #include <linux/spinlock.h> 21b4219952SHerbert Xu #include <linux/string.h> 22b4219952SHerbert Xu #include <linux/tc_act/tc_nat.h> 23b4219952SHerbert Xu #include <net/act_api.h> 24b4219952SHerbert Xu #include <net/icmp.h> 25b4219952SHerbert Xu #include <net/ip.h> 26b4219952SHerbert Xu #include <net/netlink.h> 27b4219952SHerbert Xu #include <net/tc_act/tc_nat.h> 28b4219952SHerbert Xu #include <net/tcp.h> 29b4219952SHerbert Xu #include <net/udp.h> 30b4219952SHerbert Xu 31b4219952SHerbert Xu 32b4219952SHerbert Xu #define NAT_TAB_MASK 15 33b4219952SHerbert Xu static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1]; 34b4219952SHerbert Xu static u32 nat_idx_gen; 35b4219952SHerbert Xu static DEFINE_RWLOCK(nat_lock); 36b4219952SHerbert Xu 37b4219952SHerbert Xu static struct tcf_hashinfo nat_hash_info = { 38b4219952SHerbert Xu .htab = tcf_nat_ht, 39b4219952SHerbert Xu .hmask = NAT_TAB_MASK, 40b4219952SHerbert Xu .lock = &nat_lock, 41b4219952SHerbert Xu }; 42b4219952SHerbert Xu 4353b2bf3fSPatrick McHardy static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { 4453b2bf3fSPatrick McHardy [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, 4553b2bf3fSPatrick McHardy }; 4653b2bf3fSPatrick McHardy 477ba699c6SPatrick McHardy static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, 48b4219952SHerbert Xu struct tc_action *a, int ovr, int bind) 49b4219952SHerbert Xu { 507ba699c6SPatrick McHardy struct nlattr *tb[TCA_NAT_MAX + 1]; 51b4219952SHerbert Xu struct tc_nat *parm; 52cee63723SPatrick McHardy int ret = 0, err; 53b4219952SHerbert Xu struct tcf_nat *p; 54b4219952SHerbert Xu struct tcf_common *pc; 55b4219952SHerbert Xu 56cee63723SPatrick McHardy if (nla == NULL) 57b4219952SHerbert Xu return -EINVAL; 58b4219952SHerbert Xu 5953b2bf3fSPatrick McHardy err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy); 60cee63723SPatrick McHardy if (err < 0) 61cee63723SPatrick McHardy return err; 62cee63723SPatrick McHardy 6353b2bf3fSPatrick McHardy if (tb[TCA_NAT_PARMS] == NULL) 64b4219952SHerbert Xu return -EINVAL; 657ba699c6SPatrick McHardy parm = nla_data(tb[TCA_NAT_PARMS]); 66b4219952SHerbert Xu 67b4219952SHerbert Xu pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); 68b4219952SHerbert Xu if (!pc) { 69b4219952SHerbert Xu pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 70b4219952SHerbert Xu &nat_idx_gen, &nat_hash_info); 710e991ec6SStephen Hemminger if (IS_ERR(pc)) 720e991ec6SStephen Hemminger return PTR_ERR(pc); 73b4219952SHerbert Xu p = to_tcf_nat(pc); 74b4219952SHerbert Xu ret = ACT_P_CREATED; 75b4219952SHerbert Xu } else { 76b4219952SHerbert Xu p = to_tcf_nat(pc); 77b4219952SHerbert Xu if (!ovr) { 78b4219952SHerbert Xu tcf_hash_release(pc, bind, &nat_hash_info); 79b4219952SHerbert Xu return -EEXIST; 80b4219952SHerbert Xu } 81b4219952SHerbert Xu } 82b4219952SHerbert Xu 83b4219952SHerbert Xu spin_lock_bh(&p->tcf_lock); 84b4219952SHerbert Xu p->old_addr = parm->old_addr; 85b4219952SHerbert Xu p->new_addr = parm->new_addr; 86b4219952SHerbert Xu p->mask = parm->mask; 87b4219952SHerbert Xu p->flags = parm->flags; 88b4219952SHerbert Xu 89b4219952SHerbert Xu p->tcf_action = parm->action; 90b4219952SHerbert Xu spin_unlock_bh(&p->tcf_lock); 91b4219952SHerbert Xu 92b4219952SHerbert Xu if (ret == ACT_P_CREATED) 93b4219952SHerbert Xu tcf_hash_insert(pc, &nat_hash_info); 94b4219952SHerbert Xu 95b4219952SHerbert Xu return ret; 96b4219952SHerbert Xu } 97b4219952SHerbert Xu 98b4219952SHerbert Xu static int tcf_nat_cleanup(struct tc_action *a, int bind) 99b4219952SHerbert Xu { 100b4219952SHerbert Xu struct tcf_nat *p = a->priv; 101b4219952SHerbert Xu 102b4219952SHerbert Xu return tcf_hash_release(&p->common, bind, &nat_hash_info); 103b4219952SHerbert Xu } 104b4219952SHerbert Xu 105b4219952SHerbert Xu static int tcf_nat(struct sk_buff *skb, struct tc_action *a, 106b4219952SHerbert Xu struct tcf_result *res) 107b4219952SHerbert Xu { 108b4219952SHerbert Xu struct tcf_nat *p = a->priv; 109b4219952SHerbert Xu struct iphdr *iph; 110b4219952SHerbert Xu __be32 old_addr; 111b4219952SHerbert Xu __be32 new_addr; 112b4219952SHerbert Xu __be32 mask; 113b4219952SHerbert Xu __be32 addr; 114b4219952SHerbert Xu int egress; 115b4219952SHerbert Xu int action; 116b4219952SHerbert Xu int ihl; 117b4219952SHerbert Xu 118b4219952SHerbert Xu spin_lock(&p->tcf_lock); 119b4219952SHerbert Xu 120b4219952SHerbert Xu p->tcf_tm.lastuse = jiffies; 121b4219952SHerbert Xu old_addr = p->old_addr; 122b4219952SHerbert Xu new_addr = p->new_addr; 123b4219952SHerbert Xu mask = p->mask; 124b4219952SHerbert Xu egress = p->flags & TCA_NAT_FLAG_EGRESS; 125b4219952SHerbert Xu action = p->tcf_action; 126b4219952SHerbert Xu 1270abf77e5SJussi Kivilinna p->tcf_bstats.bytes += qdisc_pkt_len(skb); 128b4219952SHerbert Xu p->tcf_bstats.packets++; 129b4219952SHerbert Xu 130b4219952SHerbert Xu spin_unlock(&p->tcf_lock); 131b4219952SHerbert Xu 132b4219952SHerbert Xu if (unlikely(action == TC_ACT_SHOT)) 133b4219952SHerbert Xu goto drop; 134b4219952SHerbert Xu 135b4219952SHerbert Xu if (!pskb_may_pull(skb, sizeof(*iph))) 136b4219952SHerbert Xu goto drop; 137b4219952SHerbert Xu 138b4219952SHerbert Xu iph = ip_hdr(skb); 139b4219952SHerbert Xu 140b4219952SHerbert Xu if (egress) 141b4219952SHerbert Xu addr = iph->saddr; 142b4219952SHerbert Xu else 143b4219952SHerbert Xu addr = iph->daddr; 144b4219952SHerbert Xu 145b4219952SHerbert Xu if (!((old_addr ^ addr) & mask)) { 146b4219952SHerbert Xu if (skb_cloned(skb) && 147b4219952SHerbert Xu !skb_clone_writable(skb, sizeof(*iph)) && 148b4219952SHerbert Xu pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 149b4219952SHerbert Xu goto drop; 150b4219952SHerbert Xu 151b4219952SHerbert Xu new_addr &= mask; 152b4219952SHerbert Xu new_addr |= addr & ~mask; 153b4219952SHerbert Xu 154b4219952SHerbert Xu /* Rewrite IP header */ 155b4219952SHerbert Xu iph = ip_hdr(skb); 156b4219952SHerbert Xu if (egress) 157b4219952SHerbert Xu iph->saddr = new_addr; 158b4219952SHerbert Xu else 159b4219952SHerbert Xu iph->daddr = new_addr; 160b4219952SHerbert Xu 161be0ea7d5SPatrick McHardy csum_replace4(&iph->check, addr, new_addr); 16233c29ddeSChangli Gao } else if ((iph->frag_off & htons(IP_OFFSET)) || 16333c29ddeSChangli Gao iph->protocol != IPPROTO_ICMP) { 16433c29ddeSChangli Gao goto out; 165b4219952SHerbert Xu } 166b4219952SHerbert Xu 167b4219952SHerbert Xu ihl = iph->ihl * 4; 168b4219952SHerbert Xu 169b4219952SHerbert Xu /* It would be nice to share code with stateful NAT. */ 170b4219952SHerbert Xu switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { 171b4219952SHerbert Xu case IPPROTO_TCP: 172b4219952SHerbert Xu { 173b4219952SHerbert Xu struct tcphdr *tcph; 174b4219952SHerbert Xu 175b4219952SHerbert Xu if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) || 176b4219952SHerbert Xu (skb_cloned(skb) && 177b4219952SHerbert Xu !skb_clone_writable(skb, ihl + sizeof(*tcph)) && 178b4219952SHerbert Xu pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 179b4219952SHerbert Xu goto drop; 180b4219952SHerbert Xu 181b4219952SHerbert Xu tcph = (void *)(skb_network_header(skb) + ihl); 182be0ea7d5SPatrick McHardy inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); 183b4219952SHerbert Xu break; 184b4219952SHerbert Xu } 185b4219952SHerbert Xu case IPPROTO_UDP: 186b4219952SHerbert Xu { 187b4219952SHerbert Xu struct udphdr *udph; 188b4219952SHerbert Xu 189b4219952SHerbert Xu if (!pskb_may_pull(skb, ihl + sizeof(*udph)) || 190b4219952SHerbert Xu (skb_cloned(skb) && 191b4219952SHerbert Xu !skb_clone_writable(skb, ihl + sizeof(*udph)) && 192b4219952SHerbert Xu pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 193b4219952SHerbert Xu goto drop; 194b4219952SHerbert Xu 195b4219952SHerbert Xu udph = (void *)(skb_network_header(skb) + ihl); 196b4219952SHerbert Xu if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 197be0ea7d5SPatrick McHardy inet_proto_csum_replace4(&udph->check, skb, addr, 198b4219952SHerbert Xu new_addr, 1); 199b4219952SHerbert Xu if (!udph->check) 200b4219952SHerbert Xu udph->check = CSUM_MANGLED_0; 201b4219952SHerbert Xu } 202b4219952SHerbert Xu break; 203b4219952SHerbert Xu } 204b4219952SHerbert Xu case IPPROTO_ICMP: 205b4219952SHerbert Xu { 206b4219952SHerbert Xu struct icmphdr *icmph; 207b4219952SHerbert Xu 20870c2efa5SChangli Gao if (!pskb_may_pull(skb, ihl + sizeof(*icmph))) 209b4219952SHerbert Xu goto drop; 210b4219952SHerbert Xu 211b4219952SHerbert Xu icmph = (void *)(skb_network_header(skb) + ihl); 212b4219952SHerbert Xu 213b4219952SHerbert Xu if ((icmph->type != ICMP_DEST_UNREACH) && 214b4219952SHerbert Xu (icmph->type != ICMP_TIME_EXCEEDED) && 215b4219952SHerbert Xu (icmph->type != ICMP_PARAMETERPROB)) 216b4219952SHerbert Xu break; 217b4219952SHerbert Xu 21870c2efa5SChangli Gao if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) 21970c2efa5SChangli Gao goto drop; 22070c2efa5SChangli Gao 221*072d79a3SChangli Gao icmph = (void *)(skb_network_header(skb) + ihl); 222b4219952SHerbert Xu iph = (void *)(icmph + 1); 223b4219952SHerbert Xu if (egress) 224b4219952SHerbert Xu addr = iph->daddr; 225b4219952SHerbert Xu else 226b4219952SHerbert Xu addr = iph->saddr; 227b4219952SHerbert Xu 228b4219952SHerbert Xu if ((old_addr ^ addr) & mask) 229b4219952SHerbert Xu break; 230b4219952SHerbert Xu 231b4219952SHerbert Xu if (skb_cloned(skb) && 232b4219952SHerbert Xu !skb_clone_writable(skb, 233b4219952SHerbert Xu ihl + sizeof(*icmph) + sizeof(*iph)) && 234b4219952SHerbert Xu pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 235b4219952SHerbert Xu goto drop; 236b4219952SHerbert Xu 237b4219952SHerbert Xu icmph = (void *)(skb_network_header(skb) + ihl); 238b4219952SHerbert Xu iph = (void *)(icmph + 1); 239b4219952SHerbert Xu 240b4219952SHerbert Xu new_addr &= mask; 241b4219952SHerbert Xu new_addr |= addr & ~mask; 242b4219952SHerbert Xu 243b4219952SHerbert Xu /* XXX Fix up the inner checksums. */ 244b4219952SHerbert Xu if (egress) 245b4219952SHerbert Xu iph->daddr = new_addr; 246b4219952SHerbert Xu else 247b4219952SHerbert Xu iph->saddr = new_addr; 248b4219952SHerbert Xu 249be0ea7d5SPatrick McHardy inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 250b4219952SHerbert Xu 1); 251b4219952SHerbert Xu break; 252b4219952SHerbert Xu } 253b4219952SHerbert Xu default: 254b4219952SHerbert Xu break; 255b4219952SHerbert Xu } 256b4219952SHerbert Xu 25733c29ddeSChangli Gao out: 258b4219952SHerbert Xu return action; 259b4219952SHerbert Xu 260b4219952SHerbert Xu drop: 261b4219952SHerbert Xu spin_lock(&p->tcf_lock); 262b4219952SHerbert Xu p->tcf_qstats.drops++; 263b4219952SHerbert Xu spin_unlock(&p->tcf_lock); 264b4219952SHerbert Xu return TC_ACT_SHOT; 265b4219952SHerbert Xu } 266b4219952SHerbert Xu 267b4219952SHerbert Xu static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, 268b4219952SHerbert Xu int bind, int ref) 269b4219952SHerbert Xu { 270b4219952SHerbert Xu unsigned char *b = skb_tail_pointer(skb); 271b4219952SHerbert Xu struct tcf_nat *p = a->priv; 272b4219952SHerbert Xu struct tc_nat *opt; 273b4219952SHerbert Xu struct tcf_t t; 274b4219952SHerbert Xu int s; 275b4219952SHerbert Xu 276b4219952SHerbert Xu s = sizeof(*opt); 277b4219952SHerbert Xu 278b4219952SHerbert Xu /* netlink spinlocks held above us - must use ATOMIC */ 279b4219952SHerbert Xu opt = kzalloc(s, GFP_ATOMIC); 280b4219952SHerbert Xu if (unlikely(!opt)) 281b4219952SHerbert Xu return -ENOBUFS; 282b4219952SHerbert Xu 283b4219952SHerbert Xu opt->old_addr = p->old_addr; 284b4219952SHerbert Xu opt->new_addr = p->new_addr; 285b4219952SHerbert Xu opt->mask = p->mask; 286b4219952SHerbert Xu opt->flags = p->flags; 287b4219952SHerbert Xu 288b4219952SHerbert Xu opt->index = p->tcf_index; 289b4219952SHerbert Xu opt->action = p->tcf_action; 290b4219952SHerbert Xu opt->refcnt = p->tcf_refcnt - ref; 291b4219952SHerbert Xu opt->bindcnt = p->tcf_bindcnt - bind; 292b4219952SHerbert Xu 2937ba699c6SPatrick McHardy NLA_PUT(skb, TCA_NAT_PARMS, s, opt); 294b4219952SHerbert Xu t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 295b4219952SHerbert Xu t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 296b4219952SHerbert Xu t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 2977ba699c6SPatrick McHardy NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); 298b4219952SHerbert Xu 299b4219952SHerbert Xu kfree(opt); 300b4219952SHerbert Xu 301b4219952SHerbert Xu return skb->len; 302b4219952SHerbert Xu 3037ba699c6SPatrick McHardy nla_put_failure: 304b4219952SHerbert Xu nlmsg_trim(skb, b); 305b4219952SHerbert Xu kfree(opt); 306b4219952SHerbert Xu return -1; 307b4219952SHerbert Xu } 308b4219952SHerbert Xu 309b4219952SHerbert Xu static struct tc_action_ops act_nat_ops = { 310b4219952SHerbert Xu .kind = "nat", 311b4219952SHerbert Xu .hinfo = &nat_hash_info, 312b4219952SHerbert Xu .type = TCA_ACT_NAT, 313b4219952SHerbert Xu .capab = TCA_CAP_NONE, 314b4219952SHerbert Xu .owner = THIS_MODULE, 315b4219952SHerbert Xu .act = tcf_nat, 316b4219952SHerbert Xu .dump = tcf_nat_dump, 317b4219952SHerbert Xu .cleanup = tcf_nat_cleanup, 318b4219952SHerbert Xu .lookup = tcf_hash_search, 319b4219952SHerbert Xu .init = tcf_nat_init, 320b4219952SHerbert Xu .walk = tcf_generic_walker 321b4219952SHerbert Xu }; 322b4219952SHerbert Xu 323b4219952SHerbert Xu MODULE_DESCRIPTION("Stateless NAT actions"); 324b4219952SHerbert Xu MODULE_LICENSE("GPL"); 325b4219952SHerbert Xu 326b4219952SHerbert Xu static int __init nat_init_module(void) 327b4219952SHerbert Xu { 328b4219952SHerbert Xu return tcf_register_action(&act_nat_ops); 329b4219952SHerbert Xu } 330b4219952SHerbert Xu 331b4219952SHerbert Xu static void __exit nat_cleanup_module(void) 332b4219952SHerbert Xu { 333b4219952SHerbert Xu tcf_unregister_action(&act_nat_ops); 334b4219952SHerbert Xu } 335b4219952SHerbert Xu 336b4219952SHerbert Xu module_init(nat_init_module); 337b4219952SHerbert Xu module_exit(nat_cleanup_module); 338