1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/types.h>
4 #include <linux/atomic.h>
5 #include <linux/inetdevice.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <linux/netfilter_ipv6.h>
9 
10 #include <net/netfilter/ipv4/nf_nat_masquerade.h>
11 #include <net/netfilter/ipv6/nf_nat_masquerade.h>
12 
13 static DEFINE_MUTEX(masq_mutex);
14 static unsigned int masq_refcnt4 __read_mostly;
15 static unsigned int masq_refcnt6 __read_mostly;
16 
17 unsigned int
18 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
19 		       const struct nf_nat_range2 *range,
20 		       const struct net_device *out)
21 {
22 	struct nf_conn *ct;
23 	struct nf_conn_nat *nat;
24 	enum ip_conntrack_info ctinfo;
25 	struct nf_nat_range2 newrange;
26 	const struct rtable *rt;
27 	__be32 newsrc, nh;
28 
29 	WARN_ON(hooknum != NF_INET_POST_ROUTING);
30 
31 	ct = nf_ct_get(skb, &ctinfo);
32 
33 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
34 			 ctinfo == IP_CT_RELATED_REPLY)));
35 
36 	/* Source address is 0.0.0.0 - locally generated packet that is
37 	 * probably not supposed to be masqueraded.
38 	 */
39 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
40 		return NF_ACCEPT;
41 
42 	rt = skb_rtable(skb);
43 	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
44 	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
45 	if (!newsrc) {
46 		pr_info("%s ate my IP address\n", out->name);
47 		return NF_DROP;
48 	}
49 
50 	nat = nf_ct_nat_ext_add(ct);
51 	if (nat)
52 		nat->masq_index = out->ifindex;
53 
54 	/* Transfer from original range. */
55 	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
56 	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
57 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
58 	newrange.min_addr.ip = newsrc;
59 	newrange.max_addr.ip = newsrc;
60 	newrange.min_proto   = range->min_proto;
61 	newrange.max_proto   = range->max_proto;
62 
63 	/* Hand modified range to generic setup. */
64 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
65 }
66 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
67 
68 static int device_cmp(struct nf_conn *i, void *ifindex)
69 {
70 	const struct nf_conn_nat *nat = nfct_nat(i);
71 
72 	if (!nat)
73 		return 0;
74 	return nat->masq_index == (int)(long)ifindex;
75 }
76 
77 static int masq_device_event(struct notifier_block *this,
78 			     unsigned long event,
79 			     void *ptr)
80 {
81 	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
82 	struct net *net = dev_net(dev);
83 
84 	if (event == NETDEV_DOWN) {
85 		/* Device was downed.  Search entire table for
86 		 * conntracks which were associated with that device,
87 		 * and forget them.
88 		 */
89 
90 		nf_ct_iterate_cleanup_net(net, device_cmp,
91 					  (void *)(long)dev->ifindex, 0, 0);
92 	}
93 
94 	return NOTIFY_DONE;
95 }
96 
97 static int inet_cmp(struct nf_conn *ct, void *ptr)
98 {
99 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
100 	struct net_device *dev = ifa->ifa_dev->dev;
101 	struct nf_conntrack_tuple *tuple;
102 
103 	if (!device_cmp(ct, (void *)(long)dev->ifindex))
104 		return 0;
105 
106 	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
107 
108 	return ifa->ifa_address == tuple->dst.u3.ip;
109 }
110 
111 static int masq_inet_event(struct notifier_block *this,
112 			   unsigned long event,
113 			   void *ptr)
114 {
115 	struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
116 	struct net *net = dev_net(idev->dev);
117 
118 	/* The masq_dev_notifier will catch the case of the device going
119 	 * down.  So if the inetdev is dead and being destroyed we have
120 	 * no work to do.  Otherwise this is an individual address removal
121 	 * and we have to perform the flush.
122 	 */
123 	if (idev->dead)
124 		return NOTIFY_DONE;
125 
126 	if (event == NETDEV_DOWN)
127 		nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
128 
129 	return NOTIFY_DONE;
130 }
131 
132 static struct notifier_block masq_dev_notifier = {
133 	.notifier_call	= masq_device_event,
134 };
135 
136 static struct notifier_block masq_inet_notifier = {
137 	.notifier_call	= masq_inet_event,
138 };
139 
140 int nf_nat_masquerade_ipv4_register_notifier(void)
141 {
142 	int ret = 0;
143 
144 	mutex_lock(&masq_mutex);
145 	if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) {
146 		ret = -EOVERFLOW;
147 		goto out_unlock;
148 	}
149 
150 	/* check if the notifier was already set */
151 	if (++masq_refcnt4 > 1)
152 		goto out_unlock;
153 
154 	/* Register for device down reports */
155 	ret = register_netdevice_notifier(&masq_dev_notifier);
156 	if (ret)
157 		goto err_dec;
158 	/* Register IP address change reports */
159 	ret = register_inetaddr_notifier(&masq_inet_notifier);
160 	if (ret)
161 		goto err_unregister;
162 
163 	mutex_unlock(&masq_mutex);
164 	return ret;
165 
166 err_unregister:
167 	unregister_netdevice_notifier(&masq_dev_notifier);
168 err_dec:
169 	masq_refcnt4--;
170 out_unlock:
171 	mutex_unlock(&masq_mutex);
172 	return ret;
173 }
174 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
175 
176 void nf_nat_masquerade_ipv4_unregister_notifier(void)
177 {
178 	mutex_lock(&masq_mutex);
179 	/* check if the notifier still has clients */
180 	if (--masq_refcnt4 > 0)
181 		goto out_unlock;
182 
183 	unregister_netdevice_notifier(&masq_dev_notifier);
184 	unregister_inetaddr_notifier(&masq_inet_notifier);
185 out_unlock:
186 	mutex_unlock(&masq_mutex);
187 }
188 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
189 
190 #if IS_ENABLED(CONFIG_IPV6)
191 static atomic_t v6_worker_count __read_mostly;
192 
193 static int
194 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
195 		       const struct in6_addr *daddr, unsigned int srcprefs,
196 		       struct in6_addr *saddr)
197 {
198 #ifdef CONFIG_IPV6_MODULE
199 	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
200 
201 	if (!v6_ops)
202 		return -EHOSTUNREACH;
203 
204 	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
205 #else
206 	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
207 #endif
208 }
209 
210 unsigned int
211 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
212 		       const struct net_device *out)
213 {
214 	enum ip_conntrack_info ctinfo;
215 	struct nf_conn_nat *nat;
216 	struct in6_addr src;
217 	struct nf_conn *ct;
218 	struct nf_nat_range2 newrange;
219 
220 	ct = nf_ct_get(skb, &ctinfo);
221 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
222 			 ctinfo == IP_CT_RELATED_REPLY)));
223 
224 	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
225 				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
226 		return NF_DROP;
227 
228 	nat = nf_ct_nat_ext_add(ct);
229 	if (nat)
230 		nat->masq_index = out->ifindex;
231 
232 	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
233 	newrange.min_addr.in6	= src;
234 	newrange.max_addr.in6	= src;
235 	newrange.min_proto	= range->min_proto;
236 	newrange.max_proto	= range->max_proto;
237 
238 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
239 }
240 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
241 
242 struct masq_dev_work {
243 	struct work_struct work;
244 	struct net *net;
245 	struct in6_addr addr;
246 	int ifindex;
247 };
248 
249 static int inet6_cmp(struct nf_conn *ct, void *work)
250 {
251 	struct masq_dev_work *w = (struct masq_dev_work *)work;
252 	struct nf_conntrack_tuple *tuple;
253 
254 	if (!device_cmp(ct, (void *)(long)w->ifindex))
255 		return 0;
256 
257 	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
258 
259 	return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
260 }
261 
262 static void iterate_cleanup_work(struct work_struct *work)
263 {
264 	struct masq_dev_work *w;
265 
266 	w = container_of(work, struct masq_dev_work, work);
267 
268 	nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
269 
270 	put_net(w->net);
271 	kfree(w);
272 	atomic_dec(&v6_worker_count);
273 	module_put(THIS_MODULE);
274 }
275 
276 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
277  *
278  * Defer it to the system workqueue.
279  *
280  * As we can have 'a lot' of inet_events (depending on amount of ipv6
281  * addresses being deleted), we also need to limit work item queue.
282  */
283 static int masq_inet6_event(struct notifier_block *this,
284 			    unsigned long event, void *ptr)
285 {
286 	struct inet6_ifaddr *ifa = ptr;
287 	const struct net_device *dev;
288 	struct masq_dev_work *w;
289 	struct net *net;
290 
291 	if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
292 		return NOTIFY_DONE;
293 
294 	dev = ifa->idev->dev;
295 	net = maybe_get_net(dev_net(dev));
296 	if (!net)
297 		return NOTIFY_DONE;
298 
299 	if (!try_module_get(THIS_MODULE))
300 		goto err_module;
301 
302 	w = kmalloc(sizeof(*w), GFP_ATOMIC);
303 	if (w) {
304 		atomic_inc(&v6_worker_count);
305 
306 		INIT_WORK(&w->work, iterate_cleanup_work);
307 		w->ifindex = dev->ifindex;
308 		w->net = net;
309 		w->addr = ifa->addr;
310 		schedule_work(&w->work);
311 
312 		return NOTIFY_DONE;
313 	}
314 
315 	module_put(THIS_MODULE);
316  err_module:
317 	put_net(net);
318 	return NOTIFY_DONE;
319 }
320 
321 static struct notifier_block masq_inet6_notifier = {
322 	.notifier_call	= masq_inet6_event,
323 };
324 
325 int nf_nat_masquerade_ipv6_register_notifier(void)
326 {
327 	int ret = 0;
328 
329 	mutex_lock(&masq_mutex);
330 	if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) {
331 		ret = -EOVERFLOW;
332 		goto out_unlock;
333 	}
334 
335 	/* check if the notifier is already set */
336 	if (++masq_refcnt6 > 1)
337 		goto out_unlock;
338 
339 	ret = register_inet6addr_notifier(&masq_inet6_notifier);
340 	if (ret)
341 		goto err_dec;
342 
343 	mutex_unlock(&masq_mutex);
344 	return ret;
345 err_dec:
346 	masq_refcnt6--;
347 out_unlock:
348 	mutex_unlock(&masq_mutex);
349 	return ret;
350 }
351 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
352 
353 void nf_nat_masquerade_ipv6_unregister_notifier(void)
354 {
355 	mutex_lock(&masq_mutex);
356 	/* check if the notifier still has clients */
357 	if (--masq_refcnt6 > 0)
358 		goto out_unlock;
359 
360 	unregister_inet6addr_notifier(&masq_inet6_notifier);
361 out_unlock:
362 	mutex_unlock(&masq_mutex);
363 }
364 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
365 #endif
366