xref: /openbmc/linux/net/sched/act_nat.c (revision 5765e78e84023ced0c719aaea2ef59b9b34f626a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Stateless NAT actions
4  *
5  * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
6  */
7 
8 #include <linux/errno.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <linux/netfilter.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/skbuff.h>
15 #include <linux/slab.h>
16 #include <linux/spinlock.h>
17 #include <linux/string.h>
18 #include <linux/tc_act/tc_nat.h>
19 #include <net/act_api.h>
20 #include <net/pkt_cls.h>
21 #include <net/icmp.h>
22 #include <net/ip.h>
23 #include <net/netlink.h>
24 #include <net/tc_act/tc_nat.h>
25 #include <net/tcp.h>
26 #include <net/udp.h>
27 
28 
29 static unsigned int nat_net_id;
30 static struct tc_action_ops act_nat_ops;
31 
32 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
33 	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
34 };
35 
36 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
37 			struct tc_action **a, int ovr, int bind,
38 			bool rtnl_held,	struct tcf_proto *tp,
39 			struct netlink_ext_ack *extack)
40 {
41 	struct tc_action_net *tn = net_generic(net, nat_net_id);
42 	struct nlattr *tb[TCA_NAT_MAX + 1];
43 	struct tcf_chain *goto_ch = NULL;
44 	struct tc_nat *parm;
45 	int ret = 0, err;
46 	struct tcf_nat *p;
47 
48 	if (nla == NULL)
49 		return -EINVAL;
50 
51 	err = nla_parse_nested_deprecated(tb, TCA_NAT_MAX, nla, nat_policy,
52 					  NULL);
53 	if (err < 0)
54 		return err;
55 
56 	if (tb[TCA_NAT_PARMS] == NULL)
57 		return -EINVAL;
58 	parm = nla_data(tb[TCA_NAT_PARMS]);
59 
60 	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
61 	if (!err) {
62 		ret = tcf_idr_create(tn, parm->index, est, a,
63 				     &act_nat_ops, bind, false);
64 		if (ret) {
65 			tcf_idr_cleanup(tn, parm->index);
66 			return ret;
67 		}
68 		ret = ACT_P_CREATED;
69 	} else if (err > 0) {
70 		if (bind)
71 			return 0;
72 		if (!ovr) {
73 			tcf_idr_release(*a, bind);
74 			return -EEXIST;
75 		}
76 	} else {
77 		return err;
78 	}
79 	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
80 	if (err < 0)
81 		goto release_idr;
82 	p = to_tcf_nat(*a);
83 
84 	spin_lock_bh(&p->tcf_lock);
85 	p->old_addr = parm->old_addr;
86 	p->new_addr = parm->new_addr;
87 	p->mask = parm->mask;
88 	p->flags = parm->flags;
89 
90 	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
91 	spin_unlock_bh(&p->tcf_lock);
92 	if (goto_ch)
93 		tcf_chain_put_by_act(goto_ch);
94 
95 	if (ret == ACT_P_CREATED)
96 		tcf_idr_insert(tn, *a);
97 
98 	return ret;
99 release_idr:
100 	tcf_idr_release(*a, bind);
101 	return err;
102 }
103 
104 static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
105 		       struct tcf_result *res)
106 {
107 	struct tcf_nat *p = to_tcf_nat(a);
108 	struct iphdr *iph;
109 	__be32 old_addr;
110 	__be32 new_addr;
111 	__be32 mask;
112 	__be32 addr;
113 	int egress;
114 	int action;
115 	int ihl;
116 	int noff;
117 
118 	spin_lock(&p->tcf_lock);
119 
120 	tcf_lastuse_update(&p->tcf_tm);
121 	old_addr = p->old_addr;
122 	new_addr = p->new_addr;
123 	mask = p->mask;
124 	egress = p->flags & TCA_NAT_FLAG_EGRESS;
125 	action = p->tcf_action;
126 
127 	bstats_update(&p->tcf_bstats, skb);
128 
129 	spin_unlock(&p->tcf_lock);
130 
131 	if (unlikely(action == TC_ACT_SHOT))
132 		goto drop;
133 
134 	noff = skb_network_offset(skb);
135 	if (!pskb_may_pull(skb, sizeof(*iph) + noff))
136 		goto drop;
137 
138 	iph = ip_hdr(skb);
139 
140 	if (egress)
141 		addr = iph->saddr;
142 	else
143 		addr = iph->daddr;
144 
145 	if (!((old_addr ^ addr) & mask)) {
146 		if (skb_try_make_writable(skb, sizeof(*iph) + noff))
147 			goto drop;
148 
149 		new_addr &= mask;
150 		new_addr |= addr & ~mask;
151 
152 		/* Rewrite IP header */
153 		iph = ip_hdr(skb);
154 		if (egress)
155 			iph->saddr = new_addr;
156 		else
157 			iph->daddr = new_addr;
158 
159 		csum_replace4(&iph->check, addr, new_addr);
160 	} else if ((iph->frag_off & htons(IP_OFFSET)) ||
161 		   iph->protocol != IPPROTO_ICMP) {
162 		goto out;
163 	}
164 
165 	ihl = iph->ihl * 4;
166 
167 	/* It would be nice to share code with stateful NAT. */
168 	switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
169 	case IPPROTO_TCP:
170 	{
171 		struct tcphdr *tcph;
172 
173 		if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
174 		    skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
175 			goto drop;
176 
177 		tcph = (void *)(skb_network_header(skb) + ihl);
178 		inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr,
179 					 true);
180 		break;
181 	}
182 	case IPPROTO_UDP:
183 	{
184 		struct udphdr *udph;
185 
186 		if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
187 		    skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
188 			goto drop;
189 
190 		udph = (void *)(skb_network_header(skb) + ihl);
191 		if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
192 			inet_proto_csum_replace4(&udph->check, skb, addr,
193 						 new_addr, true);
194 			if (!udph->check)
195 				udph->check = CSUM_MANGLED_0;
196 		}
197 		break;
198 	}
199 	case IPPROTO_ICMP:
200 	{
201 		struct icmphdr *icmph;
202 
203 		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff))
204 			goto drop;
205 
206 		icmph = (void *)(skb_network_header(skb) + ihl);
207 
208 		if ((icmph->type != ICMP_DEST_UNREACH) &&
209 		    (icmph->type != ICMP_TIME_EXCEEDED) &&
210 		    (icmph->type != ICMP_PARAMETERPROB))
211 			break;
212 
213 		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) +
214 					noff))
215 			goto drop;
216 
217 		icmph = (void *)(skb_network_header(skb) + ihl);
218 		iph = (void *)(icmph + 1);
219 		if (egress)
220 			addr = iph->daddr;
221 		else
222 			addr = iph->saddr;
223 
224 		if ((old_addr ^ addr) & mask)
225 			break;
226 
227 		if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
228 					  sizeof(*iph) + noff))
229 			goto drop;
230 
231 		icmph = (void *)(skb_network_header(skb) + ihl);
232 		iph = (void *)(icmph + 1);
233 
234 		new_addr &= mask;
235 		new_addr |= addr & ~mask;
236 
237 		/* XXX Fix up the inner checksums. */
238 		if (egress)
239 			iph->daddr = new_addr;
240 		else
241 			iph->saddr = new_addr;
242 
243 		inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
244 					 false);
245 		break;
246 	}
247 	default:
248 		break;
249 	}
250 
251 out:
252 	return action;
253 
254 drop:
255 	spin_lock(&p->tcf_lock);
256 	p->tcf_qstats.drops++;
257 	spin_unlock(&p->tcf_lock);
258 	return TC_ACT_SHOT;
259 }
260 
261 static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
262 			int bind, int ref)
263 {
264 	unsigned char *b = skb_tail_pointer(skb);
265 	struct tcf_nat *p = to_tcf_nat(a);
266 	struct tc_nat opt = {
267 		.index    = p->tcf_index,
268 		.refcnt   = refcount_read(&p->tcf_refcnt) - ref,
269 		.bindcnt  = atomic_read(&p->tcf_bindcnt) - bind,
270 	};
271 	struct tcf_t t;
272 
273 	spin_lock_bh(&p->tcf_lock);
274 	opt.old_addr = p->old_addr;
275 	opt.new_addr = p->new_addr;
276 	opt.mask = p->mask;
277 	opt.flags = p->flags;
278 	opt.action = p->tcf_action;
279 
280 	if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
281 		goto nla_put_failure;
282 
283 	tcf_tm_dump(&t, &p->tcf_tm);
284 	if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
285 		goto nla_put_failure;
286 	spin_unlock_bh(&p->tcf_lock);
287 
288 	return skb->len;
289 
290 nla_put_failure:
291 	spin_unlock_bh(&p->tcf_lock);
292 	nlmsg_trim(skb, b);
293 	return -1;
294 }
295 
296 static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
297 			  struct netlink_callback *cb, int type,
298 			  const struct tc_action_ops *ops,
299 			  struct netlink_ext_ack *extack)
300 {
301 	struct tc_action_net *tn = net_generic(net, nat_net_id);
302 
303 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
304 }
305 
306 static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
307 {
308 	struct tc_action_net *tn = net_generic(net, nat_net_id);
309 
310 	return tcf_idr_search(tn, a, index);
311 }
312 
313 static struct tc_action_ops act_nat_ops = {
314 	.kind		=	"nat",
315 	.id		=	TCA_ID_NAT,
316 	.owner		=	THIS_MODULE,
317 	.act		=	tcf_nat_act,
318 	.dump		=	tcf_nat_dump,
319 	.init		=	tcf_nat_init,
320 	.walk		=	tcf_nat_walker,
321 	.lookup		=	tcf_nat_search,
322 	.size		=	sizeof(struct tcf_nat),
323 };
324 
325 static __net_init int nat_init_net(struct net *net)
326 {
327 	struct tc_action_net *tn = net_generic(net, nat_net_id);
328 
329 	return tc_action_net_init(tn, &act_nat_ops);
330 }
331 
332 static void __net_exit nat_exit_net(struct list_head *net_list)
333 {
334 	tc_action_net_exit(net_list, nat_net_id);
335 }
336 
337 static struct pernet_operations nat_net_ops = {
338 	.init = nat_init_net,
339 	.exit_batch = nat_exit_net,
340 	.id   = &nat_net_id,
341 	.size = sizeof(struct tc_action_net),
342 };
343 
344 MODULE_DESCRIPTION("Stateless NAT actions");
345 MODULE_LICENSE("GPL");
346 
347 static int __init nat_init_module(void)
348 {
349 	return tcf_register_action(&act_nat_ops, &nat_net_ops);
350 }
351 
352 static void __exit nat_cleanup_module(void)
353 {
354 	tcf_unregister_action(&act_nat_ops, &nat_net_ops);
355 }
356 
357 module_init(nat_init_module);
358 module_exit(nat_cleanup_module);
359