xref: /openbmc/linux/net/ipv4/netfilter/iptable_nat.c (revision cf028200)
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2011 Patrick McHardy <kaber@trash.net>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.
8  */
9 
10 #include <linux/module.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter_ipv4.h>
13 #include <linux/netfilter_ipv4/ip_tables.h>
14 #include <linux/ip.h>
15 #include <net/ip.h>
16 
17 #include <net/netfilter/nf_nat.h>
18 #include <net/netfilter/nf_nat_core.h>
19 #include <net/netfilter/nf_nat_l3proto.h>
20 
21 static const struct xt_table nf_nat_ipv4_table = {
22 	.name		= "nat",
23 	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
24 			  (1 << NF_INET_POST_ROUTING) |
25 			  (1 << NF_INET_LOCAL_OUT) |
26 			  (1 << NF_INET_LOCAL_IN),
27 	.me		= THIS_MODULE,
28 	.af		= NFPROTO_IPV4,
29 };
30 
31 static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
32 {
33 	/* Force range to this IP; let proto decide mapping for
34 	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
35 	 */
36 	struct nf_nat_range range;
37 
38 	range.flags = 0;
39 	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
40 		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
41 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
42 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
43 
44 	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
45 }
46 
47 static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
48 				     const struct net_device *in,
49 				     const struct net_device *out,
50 				     struct nf_conn *ct)
51 {
52 	struct net *net = nf_ct_net(ct);
53 	unsigned int ret;
54 
55 	ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
56 	if (ret == NF_ACCEPT) {
57 		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
58 			ret = alloc_null_binding(ct, hooknum);
59 	}
60 	return ret;
61 }
62 
63 static unsigned int
64 nf_nat_ipv4_fn(unsigned int hooknum,
65 	       struct sk_buff *skb,
66 	       const struct net_device *in,
67 	       const struct net_device *out,
68 	       int (*okfn)(struct sk_buff *))
69 {
70 	struct nf_conn *ct;
71 	enum ip_conntrack_info ctinfo;
72 	struct nf_conn_nat *nat;
73 	/* maniptype == SRC for postrouting. */
74 	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
75 
76 	/* We never see fragments: conntrack defrags on pre-routing
77 	 * and local-out, and nf_nat_out protects post-routing.
78 	 */
79 	NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
80 
81 	ct = nf_ct_get(skb, &ctinfo);
82 	/* Can't track?  It's not due to stress, or conntrack would
83 	 * have dropped it.  Hence it's the user's responsibilty to
84 	 * packet filter it out, or implement conntrack/NAT for that
85 	 * protocol. 8) --RR
86 	 */
87 	if (!ct)
88 		return NF_ACCEPT;
89 
90 	/* Don't try to NAT if this packet is not conntracked */
91 	if (nf_ct_is_untracked(ct))
92 		return NF_ACCEPT;
93 
94 	nat = nfct_nat(ct);
95 	if (!nat) {
96 		/* NAT module was loaded late. */
97 		if (nf_ct_is_confirmed(ct))
98 			return NF_ACCEPT;
99 		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
100 		if (nat == NULL) {
101 			pr_debug("failed to add NAT extension\n");
102 			return NF_ACCEPT;
103 		}
104 	}
105 
106 	switch (ctinfo) {
107 	case IP_CT_RELATED:
108 	case IP_CT_RELATED_REPLY:
109 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
110 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
111 							   hooknum))
112 				return NF_DROP;
113 			else
114 				return NF_ACCEPT;
115 		}
116 		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
117 	case IP_CT_NEW:
118 		/* Seen it before?  This can happen for loopback, retrans,
119 		 * or local packets.
120 		 */
121 		if (!nf_nat_initialized(ct, maniptype)) {
122 			unsigned int ret;
123 
124 			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
125 			if (ret != NF_ACCEPT)
126 				return ret;
127 		} else
128 			pr_debug("Already setup manip %s for ct %p\n",
129 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
130 				 ct);
131 		break;
132 
133 	default:
134 		/* ESTABLISHED */
135 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
136 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
137 	}
138 
139 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
140 }
141 
142 static unsigned int
143 nf_nat_ipv4_in(unsigned int hooknum,
144 	       struct sk_buff *skb,
145 	       const struct net_device *in,
146 	       const struct net_device *out,
147 	       int (*okfn)(struct sk_buff *))
148 {
149 	unsigned int ret;
150 	__be32 daddr = ip_hdr(skb)->daddr;
151 
152 	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
153 	if (ret != NF_DROP && ret != NF_STOLEN &&
154 	    daddr != ip_hdr(skb)->daddr)
155 		skb_dst_drop(skb);
156 
157 	return ret;
158 }
159 
160 static unsigned int
161 nf_nat_ipv4_out(unsigned int hooknum,
162 		struct sk_buff *skb,
163 		const struct net_device *in,
164 		const struct net_device *out,
165 		int (*okfn)(struct sk_buff *))
166 {
167 #ifdef CONFIG_XFRM
168 	const struct nf_conn *ct;
169 	enum ip_conntrack_info ctinfo;
170 #endif
171 	unsigned int ret;
172 
173 	/* root is playing with raw sockets. */
174 	if (skb->len < sizeof(struct iphdr) ||
175 	    ip_hdrlen(skb) < sizeof(struct iphdr))
176 		return NF_ACCEPT;
177 
178 	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
179 #ifdef CONFIG_XFRM
180 	if (ret != NF_DROP && ret != NF_STOLEN &&
181 	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
182 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
183 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
184 
185 		if ((ct->tuplehash[dir].tuple.src.u3.ip !=
186 		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
187 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
188 		     ct->tuplehash[dir].tuple.src.u.all !=
189 		     ct->tuplehash[!dir].tuple.dst.u.all))
190 			if (nf_xfrm_me_harder(skb, AF_INET) < 0)
191 				ret = NF_DROP;
192 	}
193 #endif
194 	return ret;
195 }
196 
197 static unsigned int
198 nf_nat_ipv4_local_fn(unsigned int hooknum,
199 		     struct sk_buff *skb,
200 		     const struct net_device *in,
201 		     const struct net_device *out,
202 		     int (*okfn)(struct sk_buff *))
203 {
204 	const struct nf_conn *ct;
205 	enum ip_conntrack_info ctinfo;
206 	unsigned int ret;
207 
208 	/* root is playing with raw sockets. */
209 	if (skb->len < sizeof(struct iphdr) ||
210 	    ip_hdrlen(skb) < sizeof(struct iphdr))
211 		return NF_ACCEPT;
212 
213 	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
214 	if (ret != NF_DROP && ret != NF_STOLEN &&
215 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
216 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
217 
218 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
219 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
220 			if (ip_route_me_harder(skb, RTN_UNSPEC))
221 				ret = NF_DROP;
222 		}
223 #ifdef CONFIG_XFRM
224 		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
225 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
226 			 ct->tuplehash[dir].tuple.dst.u.all !=
227 			 ct->tuplehash[!dir].tuple.src.u.all)
228 			if (nf_xfrm_me_harder(skb, AF_INET) < 0)
229 				ret = NF_DROP;
230 #endif
231 	}
232 	return ret;
233 }
234 
235 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
236 	/* Before packet filtering, change destination */
237 	{
238 		.hook		= nf_nat_ipv4_in,
239 		.owner		= THIS_MODULE,
240 		.pf		= NFPROTO_IPV4,
241 		.hooknum	= NF_INET_PRE_ROUTING,
242 		.priority	= NF_IP_PRI_NAT_DST,
243 	},
244 	/* After packet filtering, change source */
245 	{
246 		.hook		= nf_nat_ipv4_out,
247 		.owner		= THIS_MODULE,
248 		.pf		= NFPROTO_IPV4,
249 		.hooknum	= NF_INET_POST_ROUTING,
250 		.priority	= NF_IP_PRI_NAT_SRC,
251 	},
252 	/* Before packet filtering, change destination */
253 	{
254 		.hook		= nf_nat_ipv4_local_fn,
255 		.owner		= THIS_MODULE,
256 		.pf		= NFPROTO_IPV4,
257 		.hooknum	= NF_INET_LOCAL_OUT,
258 		.priority	= NF_IP_PRI_NAT_DST,
259 	},
260 	/* After packet filtering, change source */
261 	{
262 		.hook		= nf_nat_ipv4_fn,
263 		.owner		= THIS_MODULE,
264 		.pf		= NFPROTO_IPV4,
265 		.hooknum	= NF_INET_LOCAL_IN,
266 		.priority	= NF_IP_PRI_NAT_SRC,
267 	},
268 };
269 
270 static int __net_init iptable_nat_net_init(struct net *net)
271 {
272 	struct ipt_replace *repl;
273 
274 	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
275 	if (repl == NULL)
276 		return -ENOMEM;
277 	net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
278 	kfree(repl);
279 	if (IS_ERR(net->ipv4.nat_table))
280 		return PTR_ERR(net->ipv4.nat_table);
281 	return 0;
282 }
283 
284 static void __net_exit iptable_nat_net_exit(struct net *net)
285 {
286 	ipt_unregister_table(net, net->ipv4.nat_table);
287 }
288 
289 static struct pernet_operations iptable_nat_net_ops = {
290 	.init	= iptable_nat_net_init,
291 	.exit	= iptable_nat_net_exit,
292 };
293 
294 static int __init iptable_nat_init(void)
295 {
296 	int err;
297 
298 	err = register_pernet_subsys(&iptable_nat_net_ops);
299 	if (err < 0)
300 		goto err1;
301 
302 	err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
303 	if (err < 0)
304 		goto err2;
305 	return 0;
306 
307 err2:
308 	unregister_pernet_subsys(&iptable_nat_net_ops);
309 err1:
310 	return err;
311 }
312 
313 static void __exit iptable_nat_exit(void)
314 {
315 	nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
316 	unregister_pernet_subsys(&iptable_nat_net_ops);
317 }
318 
319 module_init(iptable_nat_init);
320 module_exit(iptable_nat_exit);
321 
322 MODULE_LICENSE("GPL");
323