xref: /openbmc/linux/net/ipv4/netfilter/iptable_nat.c (revision c4c11dd1)
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2011 Patrick McHardy <kaber@trash.net>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.
8  */
9 
10 #include <linux/module.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter_ipv4.h>
13 #include <linux/netfilter_ipv4/ip_tables.h>
14 #include <linux/ip.h>
15 #include <net/ip.h>
16 
17 #include <net/netfilter/nf_nat.h>
18 #include <net/netfilter/nf_nat_core.h>
19 #include <net/netfilter/nf_nat_l3proto.h>
20 
21 static const struct xt_table nf_nat_ipv4_table = {
22 	.name		= "nat",
23 	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
24 			  (1 << NF_INET_POST_ROUTING) |
25 			  (1 << NF_INET_LOCAL_OUT) |
26 			  (1 << NF_INET_LOCAL_IN),
27 	.me		= THIS_MODULE,
28 	.af		= NFPROTO_IPV4,
29 };
30 
31 static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
32 {
33 	/* Force range to this IP; let proto decide mapping for
34 	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
35 	 */
36 	struct nf_nat_range range;
37 
38 	range.flags = 0;
39 	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
40 		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
41 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
42 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
43 
44 	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
45 }
46 
47 static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
48 				     const struct net_device *in,
49 				     const struct net_device *out,
50 				     struct nf_conn *ct)
51 {
52 	struct net *net = nf_ct_net(ct);
53 	unsigned int ret;
54 
55 	ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
56 	if (ret == NF_ACCEPT) {
57 		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
58 			ret = alloc_null_binding(ct, hooknum);
59 	}
60 	return ret;
61 }
62 
63 static unsigned int
64 nf_nat_ipv4_fn(unsigned int hooknum,
65 	       struct sk_buff *skb,
66 	       const struct net_device *in,
67 	       const struct net_device *out,
68 	       int (*okfn)(struct sk_buff *))
69 {
70 	struct nf_conn *ct;
71 	enum ip_conntrack_info ctinfo;
72 	struct nf_conn_nat *nat;
73 	/* maniptype == SRC for postrouting. */
74 	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
75 
76 	/* We never see fragments: conntrack defrags on pre-routing
77 	 * and local-out, and nf_nat_out protects post-routing.
78 	 */
79 	NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
80 
81 	ct = nf_ct_get(skb, &ctinfo);
82 	/* Can't track?  It's not due to stress, or conntrack would
83 	 * have dropped it.  Hence it's the user's responsibilty to
84 	 * packet filter it out, or implement conntrack/NAT for that
85 	 * protocol. 8) --RR
86 	 */
87 	if (!ct)
88 		return NF_ACCEPT;
89 
90 	/* Don't try to NAT if this packet is not conntracked */
91 	if (nf_ct_is_untracked(ct))
92 		return NF_ACCEPT;
93 
94 	nat = nfct_nat(ct);
95 	if (!nat) {
96 		/* NAT module was loaded late. */
97 		if (nf_ct_is_confirmed(ct))
98 			return NF_ACCEPT;
99 		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
100 		if (nat == NULL) {
101 			pr_debug("failed to add NAT extension\n");
102 			return NF_ACCEPT;
103 		}
104 	}
105 
106 	switch (ctinfo) {
107 	case IP_CT_RELATED:
108 	case IP_CT_RELATED_REPLY:
109 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
110 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
111 							   hooknum))
112 				return NF_DROP;
113 			else
114 				return NF_ACCEPT;
115 		}
116 		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
117 	case IP_CT_NEW:
118 		/* Seen it before?  This can happen for loopback, retrans,
119 		 * or local packets.
120 		 */
121 		if (!nf_nat_initialized(ct, maniptype)) {
122 			unsigned int ret;
123 
124 			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
125 			if (ret != NF_ACCEPT)
126 				return ret;
127 		} else {
128 			pr_debug("Already setup manip %s for ct %p\n",
129 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
130 				 ct);
131 			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
132 				goto oif_changed;
133 		}
134 		break;
135 
136 	default:
137 		/* ESTABLISHED */
138 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
139 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
140 		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
141 			goto oif_changed;
142 	}
143 
144 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
145 
146 oif_changed:
147 	nf_ct_kill_acct(ct, ctinfo, skb);
148 	return NF_DROP;
149 }
150 
151 static unsigned int
152 nf_nat_ipv4_in(unsigned int hooknum,
153 	       struct sk_buff *skb,
154 	       const struct net_device *in,
155 	       const struct net_device *out,
156 	       int (*okfn)(struct sk_buff *))
157 {
158 	unsigned int ret;
159 	__be32 daddr = ip_hdr(skb)->daddr;
160 
161 	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
162 	if (ret != NF_DROP && ret != NF_STOLEN &&
163 	    daddr != ip_hdr(skb)->daddr)
164 		skb_dst_drop(skb);
165 
166 	return ret;
167 }
168 
169 static unsigned int
170 nf_nat_ipv4_out(unsigned int hooknum,
171 		struct sk_buff *skb,
172 		const struct net_device *in,
173 		const struct net_device *out,
174 		int (*okfn)(struct sk_buff *))
175 {
176 #ifdef CONFIG_XFRM
177 	const struct nf_conn *ct;
178 	enum ip_conntrack_info ctinfo;
179 	int err;
180 #endif
181 	unsigned int ret;
182 
183 	/* root is playing with raw sockets. */
184 	if (skb->len < sizeof(struct iphdr) ||
185 	    ip_hdrlen(skb) < sizeof(struct iphdr))
186 		return NF_ACCEPT;
187 
188 	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
189 #ifdef CONFIG_XFRM
190 	if (ret != NF_DROP && ret != NF_STOLEN &&
191 	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
192 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
193 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
194 
195 		if ((ct->tuplehash[dir].tuple.src.u3.ip !=
196 		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
197 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
198 		     ct->tuplehash[dir].tuple.src.u.all !=
199 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
200 			err = nf_xfrm_me_harder(skb, AF_INET);
201 			if (err < 0)
202 				ret = NF_DROP_ERR(err);
203 		}
204 	}
205 #endif
206 	return ret;
207 }
208 
209 static unsigned int
210 nf_nat_ipv4_local_fn(unsigned int hooknum,
211 		     struct sk_buff *skb,
212 		     const struct net_device *in,
213 		     const struct net_device *out,
214 		     int (*okfn)(struct sk_buff *))
215 {
216 	const struct nf_conn *ct;
217 	enum ip_conntrack_info ctinfo;
218 	unsigned int ret;
219 	int err;
220 
221 	/* root is playing with raw sockets. */
222 	if (skb->len < sizeof(struct iphdr) ||
223 	    ip_hdrlen(skb) < sizeof(struct iphdr))
224 		return NF_ACCEPT;
225 
226 	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
227 	if (ret != NF_DROP && ret != NF_STOLEN &&
228 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
229 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
230 
231 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
232 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
233 			err = ip_route_me_harder(skb, RTN_UNSPEC);
234 			if (err < 0)
235 				ret = NF_DROP_ERR(err);
236 		}
237 #ifdef CONFIG_XFRM
238 		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
239 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
240 			 ct->tuplehash[dir].tuple.dst.u.all !=
241 			 ct->tuplehash[!dir].tuple.src.u.all) {
242 			err = nf_xfrm_me_harder(skb, AF_INET);
243 			if (err < 0)
244 				ret = NF_DROP_ERR(err);
245 		}
246 #endif
247 	}
248 	return ret;
249 }
250 
251 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
252 	/* Before packet filtering, change destination */
253 	{
254 		.hook		= nf_nat_ipv4_in,
255 		.owner		= THIS_MODULE,
256 		.pf		= NFPROTO_IPV4,
257 		.hooknum	= NF_INET_PRE_ROUTING,
258 		.priority	= NF_IP_PRI_NAT_DST,
259 	},
260 	/* After packet filtering, change source */
261 	{
262 		.hook		= nf_nat_ipv4_out,
263 		.owner		= THIS_MODULE,
264 		.pf		= NFPROTO_IPV4,
265 		.hooknum	= NF_INET_POST_ROUTING,
266 		.priority	= NF_IP_PRI_NAT_SRC,
267 	},
268 	/* Before packet filtering, change destination */
269 	{
270 		.hook		= nf_nat_ipv4_local_fn,
271 		.owner		= THIS_MODULE,
272 		.pf		= NFPROTO_IPV4,
273 		.hooknum	= NF_INET_LOCAL_OUT,
274 		.priority	= NF_IP_PRI_NAT_DST,
275 	},
276 	/* After packet filtering, change source */
277 	{
278 		.hook		= nf_nat_ipv4_fn,
279 		.owner		= THIS_MODULE,
280 		.pf		= NFPROTO_IPV4,
281 		.hooknum	= NF_INET_LOCAL_IN,
282 		.priority	= NF_IP_PRI_NAT_SRC,
283 	},
284 };
285 
286 static int __net_init iptable_nat_net_init(struct net *net)
287 {
288 	struct ipt_replace *repl;
289 
290 	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
291 	if (repl == NULL)
292 		return -ENOMEM;
293 	net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
294 	kfree(repl);
295 	return PTR_RET(net->ipv4.nat_table);
296 }
297 
298 static void __net_exit iptable_nat_net_exit(struct net *net)
299 {
300 	ipt_unregister_table(net, net->ipv4.nat_table);
301 }
302 
303 static struct pernet_operations iptable_nat_net_ops = {
304 	.init	= iptable_nat_net_init,
305 	.exit	= iptable_nat_net_exit,
306 };
307 
308 static int __init iptable_nat_init(void)
309 {
310 	int err;
311 
312 	err = register_pernet_subsys(&iptable_nat_net_ops);
313 	if (err < 0)
314 		goto err1;
315 
316 	err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
317 	if (err < 0)
318 		goto err2;
319 	return 0;
320 
321 err2:
322 	unregister_pernet_subsys(&iptable_nat_net_ops);
323 err1:
324 	return err;
325 }
326 
327 static void __exit iptable_nat_exit(void)
328 {
329 	nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
330 	unregister_pernet_subsys(&iptable_nat_net_ops);
331 }
332 
333 module_init(iptable_nat_init);
334 module_exit(iptable_nat_exit);
335 
336 MODULE_LICENSE("GPL");
337