1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Stateless NAT actions 4 * 5 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 6 */ 7 8 #include <linux/errno.h> 9 #include <linux/init.h> 10 #include <linux/kernel.h> 11 #include <linux/module.h> 12 #include <linux/netfilter.h> 13 #include <linux/rtnetlink.h> 14 #include <linux/skbuff.h> 15 #include <linux/slab.h> 16 #include <linux/spinlock.h> 17 #include <linux/string.h> 18 #include <linux/tc_act/tc_nat.h> 19 #include <net/act_api.h> 20 #include <net/pkt_cls.h> 21 #include <net/icmp.h> 22 #include <net/ip.h> 23 #include <net/netlink.h> 24 #include <net/tc_act/tc_nat.h> 25 #include <net/tcp.h> 26 #include <net/udp.h> 27 28 29 static unsigned int nat_net_id; 30 static struct tc_action_ops act_nat_ops; 31 32 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { 33 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, 34 }; 35 36 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, 37 struct tc_action **a, int ovr, int bind, 38 bool rtnl_held, struct tcf_proto *tp, 39 struct netlink_ext_ack *extack) 40 { 41 struct tc_action_net *tn = net_generic(net, nat_net_id); 42 struct nlattr *tb[TCA_NAT_MAX + 1]; 43 struct tcf_chain *goto_ch = NULL; 44 struct tc_nat *parm; 45 int ret = 0, err; 46 struct tcf_nat *p; 47 u32 index; 48 49 if (nla == NULL) 50 return -EINVAL; 51 52 err = nla_parse_nested_deprecated(tb, TCA_NAT_MAX, nla, nat_policy, 53 NULL); 54 if (err < 0) 55 return err; 56 57 if (tb[TCA_NAT_PARMS] == NULL) 58 return -EINVAL; 59 parm = nla_data(tb[TCA_NAT_PARMS]); 60 index = parm->index; 61 err = tcf_idr_check_alloc(tn, &index, a, bind); 62 if (!err) { 63 ret = tcf_idr_create(tn, index, est, a, 64 &act_nat_ops, bind, false); 65 if (ret) { 66 tcf_idr_cleanup(tn, index); 67 return ret; 68 } 69 ret = ACT_P_CREATED; 70 } else if (err > 0) { 71 if (bind) 72 return 0; 73 if (!ovr) { 74 tcf_idr_release(*a, bind); 75 return -EEXIST; 76 } 77 } else { 78 return err; 79 } 80 err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); 81 if (err < 0) 82 goto release_idr; 83 p = to_tcf_nat(*a); 84 85 spin_lock_bh(&p->tcf_lock); 86 p->old_addr = parm->old_addr; 87 p->new_addr = parm->new_addr; 88 p->mask = parm->mask; 89 p->flags = parm->flags; 90 91 goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); 92 spin_unlock_bh(&p->tcf_lock); 93 if (goto_ch) 94 tcf_chain_put_by_act(goto_ch); 95 96 if (ret == ACT_P_CREATED) 97 tcf_idr_insert(tn, *a); 98 99 return ret; 100 release_idr: 101 tcf_idr_release(*a, bind); 102 return err; 103 } 104 105 static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a, 106 struct tcf_result *res) 107 { 108 struct tcf_nat *p = to_tcf_nat(a); 109 struct iphdr *iph; 110 __be32 old_addr; 111 __be32 new_addr; 112 __be32 mask; 113 __be32 addr; 114 int egress; 115 int action; 116 int ihl; 117 int noff; 118 119 spin_lock(&p->tcf_lock); 120 121 tcf_lastuse_update(&p->tcf_tm); 122 old_addr = p->old_addr; 123 new_addr = p->new_addr; 124 mask = p->mask; 125 egress = p->flags & TCA_NAT_FLAG_EGRESS; 126 action = p->tcf_action; 127 128 bstats_update(&p->tcf_bstats, skb); 129 130 spin_unlock(&p->tcf_lock); 131 132 if (unlikely(action == TC_ACT_SHOT)) 133 goto drop; 134 135 noff = skb_network_offset(skb); 136 if (!pskb_may_pull(skb, sizeof(*iph) + noff)) 137 goto drop; 138 139 iph = ip_hdr(skb); 140 141 if (egress) 142 addr = iph->saddr; 143 else 144 addr = iph->daddr; 145 146 if (!((old_addr ^ addr) & mask)) { 147 if (skb_try_make_writable(skb, sizeof(*iph) + noff)) 148 goto drop; 149 150 new_addr &= mask; 151 new_addr |= addr & ~mask; 152 153 /* Rewrite IP header */ 154 iph = ip_hdr(skb); 155 if (egress) 156 iph->saddr = new_addr; 157 else 158 iph->daddr = new_addr; 159 160 csum_replace4(&iph->check, addr, new_addr); 161 } else if ((iph->frag_off & htons(IP_OFFSET)) || 162 iph->protocol != IPPROTO_ICMP) { 163 goto out; 164 } 165 166 ihl = iph->ihl * 4; 167 168 /* It would be nice to share code with stateful NAT. */ 169 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { 170 case IPPROTO_TCP: 171 { 172 struct tcphdr *tcph; 173 174 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || 175 skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff)) 176 goto drop; 177 178 tcph = (void *)(skb_network_header(skb) + ihl); 179 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 180 true); 181 break; 182 } 183 case IPPROTO_UDP: 184 { 185 struct udphdr *udph; 186 187 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || 188 skb_try_make_writable(skb, ihl + sizeof(*udph) + noff)) 189 goto drop; 190 191 udph = (void *)(skb_network_header(skb) + ihl); 192 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 193 inet_proto_csum_replace4(&udph->check, skb, addr, 194 new_addr, true); 195 if (!udph->check) 196 udph->check = CSUM_MANGLED_0; 197 } 198 break; 199 } 200 case IPPROTO_ICMP: 201 { 202 struct icmphdr *icmph; 203 204 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff)) 205 goto drop; 206 207 icmph = (void *)(skb_network_header(skb) + ihl); 208 209 if ((icmph->type != ICMP_DEST_UNREACH) && 210 (icmph->type != ICMP_TIME_EXCEEDED) && 211 (icmph->type != ICMP_PARAMETERPROB)) 212 break; 213 214 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) + 215 noff)) 216 goto drop; 217 218 icmph = (void *)(skb_network_header(skb) + ihl); 219 iph = (void *)(icmph + 1); 220 if (egress) 221 addr = iph->daddr; 222 else 223 addr = iph->saddr; 224 225 if ((old_addr ^ addr) & mask) 226 break; 227 228 if (skb_try_make_writable(skb, ihl + sizeof(*icmph) + 229 sizeof(*iph) + noff)) 230 goto drop; 231 232 icmph = (void *)(skb_network_header(skb) + ihl); 233 iph = (void *)(icmph + 1); 234 235 new_addr &= mask; 236 new_addr |= addr & ~mask; 237 238 /* XXX Fix up the inner checksums. */ 239 if (egress) 240 iph->daddr = new_addr; 241 else 242 iph->saddr = new_addr; 243 244 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 245 false); 246 break; 247 } 248 default: 249 break; 250 } 251 252 out: 253 return action; 254 255 drop: 256 spin_lock(&p->tcf_lock); 257 p->tcf_qstats.drops++; 258 spin_unlock(&p->tcf_lock); 259 return TC_ACT_SHOT; 260 } 261 262 static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, 263 int bind, int ref) 264 { 265 unsigned char *b = skb_tail_pointer(skb); 266 struct tcf_nat *p = to_tcf_nat(a); 267 struct tc_nat opt = { 268 .index = p->tcf_index, 269 .refcnt = refcount_read(&p->tcf_refcnt) - ref, 270 .bindcnt = atomic_read(&p->tcf_bindcnt) - bind, 271 }; 272 struct tcf_t t; 273 274 spin_lock_bh(&p->tcf_lock); 275 opt.old_addr = p->old_addr; 276 opt.new_addr = p->new_addr; 277 opt.mask = p->mask; 278 opt.flags = p->flags; 279 opt.action = p->tcf_action; 280 281 if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt)) 282 goto nla_put_failure; 283 284 tcf_tm_dump(&t, &p->tcf_tm); 285 if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD)) 286 goto nla_put_failure; 287 spin_unlock_bh(&p->tcf_lock); 288 289 return skb->len; 290 291 nla_put_failure: 292 spin_unlock_bh(&p->tcf_lock); 293 nlmsg_trim(skb, b); 294 return -1; 295 } 296 297 static int tcf_nat_walker(struct net *net, struct sk_buff *skb, 298 struct netlink_callback *cb, int type, 299 const struct tc_action_ops *ops, 300 struct netlink_ext_ack *extack) 301 { 302 struct tc_action_net *tn = net_generic(net, nat_net_id); 303 304 return tcf_generic_walker(tn, skb, cb, type, ops, extack); 305 } 306 307 static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) 308 { 309 struct tc_action_net *tn = net_generic(net, nat_net_id); 310 311 return tcf_idr_search(tn, a, index); 312 } 313 314 static struct tc_action_ops act_nat_ops = { 315 .kind = "nat", 316 .id = TCA_ID_NAT, 317 .owner = THIS_MODULE, 318 .act = tcf_nat_act, 319 .dump = tcf_nat_dump, 320 .init = tcf_nat_init, 321 .walk = tcf_nat_walker, 322 .lookup = tcf_nat_search, 323 .size = sizeof(struct tcf_nat), 324 }; 325 326 static __net_init int nat_init_net(struct net *net) 327 { 328 struct tc_action_net *tn = net_generic(net, nat_net_id); 329 330 return tc_action_net_init(net, tn, &act_nat_ops); 331 } 332 333 static void __net_exit nat_exit_net(struct list_head *net_list) 334 { 335 tc_action_net_exit(net_list, nat_net_id); 336 } 337 338 static struct pernet_operations nat_net_ops = { 339 .init = nat_init_net, 340 .exit_batch = nat_exit_net, 341 .id = &nat_net_id, 342 .size = sizeof(struct tc_action_net), 343 }; 344 345 MODULE_DESCRIPTION("Stateless NAT actions"); 346 MODULE_LICENSE("GPL"); 347 348 static int __init nat_init_module(void) 349 { 350 return tcf_register_action(&act_nat_ops, &nat_net_ops); 351 } 352 353 static void __exit nat_cleanup_module(void) 354 { 355 tcf_unregister_action(&act_nat_ops, &nat_net_ops); 356 } 357 358 module_init(nat_init_module); 359 module_exit(nat_cleanup_module); 360