1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/types.h>
4 #include <linux/atomic.h>
5 #include <linux/inetdevice.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <linux/netfilter_ipv6.h>
9 
10 #include <net/netfilter/nf_nat_masquerade.h>
11 
12 static DEFINE_MUTEX(masq_mutex);
13 static unsigned int masq_refcnt4 __read_mostly;
14 static unsigned int masq_refcnt6 __read_mostly;
15 
16 unsigned int
17 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
18 		       const struct nf_nat_range2 *range,
19 		       const struct net_device *out)
20 {
21 	struct nf_conn *ct;
22 	struct nf_conn_nat *nat;
23 	enum ip_conntrack_info ctinfo;
24 	struct nf_nat_range2 newrange;
25 	const struct rtable *rt;
26 	__be32 newsrc, nh;
27 
28 	WARN_ON(hooknum != NF_INET_POST_ROUTING);
29 
30 	ct = nf_ct_get(skb, &ctinfo);
31 
32 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
33 			 ctinfo == IP_CT_RELATED_REPLY)));
34 
35 	/* Source address is 0.0.0.0 - locally generated packet that is
36 	 * probably not supposed to be masqueraded.
37 	 */
38 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
39 		return NF_ACCEPT;
40 
41 	rt = skb_rtable(skb);
42 	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
43 	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
44 	if (!newsrc) {
45 		pr_info("%s ate my IP address\n", out->name);
46 		return NF_DROP;
47 	}
48 
49 	nat = nf_ct_nat_ext_add(ct);
50 	if (nat)
51 		nat->masq_index = out->ifindex;
52 
53 	/* Transfer from original range. */
54 	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
55 	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
56 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
57 	newrange.min_addr.ip = newsrc;
58 	newrange.max_addr.ip = newsrc;
59 	newrange.min_proto   = range->min_proto;
60 	newrange.max_proto   = range->max_proto;
61 
62 	/* Hand modified range to generic setup. */
63 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
64 }
65 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
66 
67 static int device_cmp(struct nf_conn *i, void *ifindex)
68 {
69 	const struct nf_conn_nat *nat = nfct_nat(i);
70 
71 	if (!nat)
72 		return 0;
73 	return nat->masq_index == (int)(long)ifindex;
74 }
75 
76 static int masq_device_event(struct notifier_block *this,
77 			     unsigned long event,
78 			     void *ptr)
79 {
80 	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
81 	struct net *net = dev_net(dev);
82 
83 	if (event == NETDEV_DOWN) {
84 		/* Device was downed.  Search entire table for
85 		 * conntracks which were associated with that device,
86 		 * and forget them.
87 		 */
88 
89 		nf_ct_iterate_cleanup_net(net, device_cmp,
90 					  (void *)(long)dev->ifindex, 0, 0);
91 	}
92 
93 	return NOTIFY_DONE;
94 }
95 
96 static int inet_cmp(struct nf_conn *ct, void *ptr)
97 {
98 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
99 	struct net_device *dev = ifa->ifa_dev->dev;
100 	struct nf_conntrack_tuple *tuple;
101 
102 	if (!device_cmp(ct, (void *)(long)dev->ifindex))
103 		return 0;
104 
105 	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
106 
107 	return ifa->ifa_address == tuple->dst.u3.ip;
108 }
109 
110 static int masq_inet_event(struct notifier_block *this,
111 			   unsigned long event,
112 			   void *ptr)
113 {
114 	struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
115 	struct net *net = dev_net(idev->dev);
116 
117 	/* The masq_dev_notifier will catch the case of the device going
118 	 * down.  So if the inetdev is dead and being destroyed we have
119 	 * no work to do.  Otherwise this is an individual address removal
120 	 * and we have to perform the flush.
121 	 */
122 	if (idev->dead)
123 		return NOTIFY_DONE;
124 
125 	if (event == NETDEV_DOWN)
126 		nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
127 
128 	return NOTIFY_DONE;
129 }
130 
131 static struct notifier_block masq_dev_notifier = {
132 	.notifier_call	= masq_device_event,
133 };
134 
135 static struct notifier_block masq_inet_notifier = {
136 	.notifier_call	= masq_inet_event,
137 };
138 
139 int nf_nat_masquerade_ipv4_register_notifier(void)
140 {
141 	int ret = 0;
142 
143 	mutex_lock(&masq_mutex);
144 	if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) {
145 		ret = -EOVERFLOW;
146 		goto out_unlock;
147 	}
148 
149 	/* check if the notifier was already set */
150 	if (++masq_refcnt4 > 1)
151 		goto out_unlock;
152 
153 	/* Register for device down reports */
154 	ret = register_netdevice_notifier(&masq_dev_notifier);
155 	if (ret)
156 		goto err_dec;
157 	/* Register IP address change reports */
158 	ret = register_inetaddr_notifier(&masq_inet_notifier);
159 	if (ret)
160 		goto err_unregister;
161 
162 	mutex_unlock(&masq_mutex);
163 	return ret;
164 
165 err_unregister:
166 	unregister_netdevice_notifier(&masq_dev_notifier);
167 err_dec:
168 	masq_refcnt4--;
169 out_unlock:
170 	mutex_unlock(&masq_mutex);
171 	return ret;
172 }
173 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
174 
175 void nf_nat_masquerade_ipv4_unregister_notifier(void)
176 {
177 	mutex_lock(&masq_mutex);
178 	/* check if the notifier still has clients */
179 	if (--masq_refcnt4 > 0)
180 		goto out_unlock;
181 
182 	unregister_netdevice_notifier(&masq_dev_notifier);
183 	unregister_inetaddr_notifier(&masq_inet_notifier);
184 out_unlock:
185 	mutex_unlock(&masq_mutex);
186 }
187 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
188 
189 #if IS_ENABLED(CONFIG_IPV6)
190 static atomic_t v6_worker_count __read_mostly;
191 
192 static int
193 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
194 		       const struct in6_addr *daddr, unsigned int srcprefs,
195 		       struct in6_addr *saddr)
196 {
197 #ifdef CONFIG_IPV6_MODULE
198 	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
199 
200 	if (!v6_ops)
201 		return -EHOSTUNREACH;
202 
203 	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
204 #else
205 	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
206 #endif
207 }
208 
209 unsigned int
210 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
211 		       const struct net_device *out)
212 {
213 	enum ip_conntrack_info ctinfo;
214 	struct nf_conn_nat *nat;
215 	struct in6_addr src;
216 	struct nf_conn *ct;
217 	struct nf_nat_range2 newrange;
218 
219 	ct = nf_ct_get(skb, &ctinfo);
220 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
221 			 ctinfo == IP_CT_RELATED_REPLY)));
222 
223 	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
224 				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
225 		return NF_DROP;
226 
227 	nat = nf_ct_nat_ext_add(ct);
228 	if (nat)
229 		nat->masq_index = out->ifindex;
230 
231 	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
232 	newrange.min_addr.in6	= src;
233 	newrange.max_addr.in6	= src;
234 	newrange.min_proto	= range->min_proto;
235 	newrange.max_proto	= range->max_proto;
236 
237 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
238 }
239 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
240 
241 struct masq_dev_work {
242 	struct work_struct work;
243 	struct net *net;
244 	struct in6_addr addr;
245 	int ifindex;
246 };
247 
248 static int inet6_cmp(struct nf_conn *ct, void *work)
249 {
250 	struct masq_dev_work *w = (struct masq_dev_work *)work;
251 	struct nf_conntrack_tuple *tuple;
252 
253 	if (!device_cmp(ct, (void *)(long)w->ifindex))
254 		return 0;
255 
256 	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
257 
258 	return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
259 }
260 
261 static void iterate_cleanup_work(struct work_struct *work)
262 {
263 	struct masq_dev_work *w;
264 
265 	w = container_of(work, struct masq_dev_work, work);
266 
267 	nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
268 
269 	put_net(w->net);
270 	kfree(w);
271 	atomic_dec(&v6_worker_count);
272 	module_put(THIS_MODULE);
273 }
274 
275 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
276  *
277  * Defer it to the system workqueue.
278  *
279  * As we can have 'a lot' of inet_events (depending on amount of ipv6
280  * addresses being deleted), we also need to limit work item queue.
281  */
282 static int masq_inet6_event(struct notifier_block *this,
283 			    unsigned long event, void *ptr)
284 {
285 	struct inet6_ifaddr *ifa = ptr;
286 	const struct net_device *dev;
287 	struct masq_dev_work *w;
288 	struct net *net;
289 
290 	if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
291 		return NOTIFY_DONE;
292 
293 	dev = ifa->idev->dev;
294 	net = maybe_get_net(dev_net(dev));
295 	if (!net)
296 		return NOTIFY_DONE;
297 
298 	if (!try_module_get(THIS_MODULE))
299 		goto err_module;
300 
301 	w = kmalloc(sizeof(*w), GFP_ATOMIC);
302 	if (w) {
303 		atomic_inc(&v6_worker_count);
304 
305 		INIT_WORK(&w->work, iterate_cleanup_work);
306 		w->ifindex = dev->ifindex;
307 		w->net = net;
308 		w->addr = ifa->addr;
309 		schedule_work(&w->work);
310 
311 		return NOTIFY_DONE;
312 	}
313 
314 	module_put(THIS_MODULE);
315  err_module:
316 	put_net(net);
317 	return NOTIFY_DONE;
318 }
319 
320 static struct notifier_block masq_inet6_notifier = {
321 	.notifier_call	= masq_inet6_event,
322 };
323 
324 int nf_nat_masquerade_ipv6_register_notifier(void)
325 {
326 	int ret = 0;
327 
328 	mutex_lock(&masq_mutex);
329 	if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) {
330 		ret = -EOVERFLOW;
331 		goto out_unlock;
332 	}
333 
334 	/* check if the notifier is already set */
335 	if (++masq_refcnt6 > 1)
336 		goto out_unlock;
337 
338 	ret = register_inet6addr_notifier(&masq_inet6_notifier);
339 	if (ret)
340 		goto err_dec;
341 
342 	mutex_unlock(&masq_mutex);
343 	return ret;
344 err_dec:
345 	masq_refcnt6--;
346 out_unlock:
347 	mutex_unlock(&masq_mutex);
348 	return ret;
349 }
350 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
351 
352 void nf_nat_masquerade_ipv6_unregister_notifier(void)
353 {
354 	mutex_lock(&masq_mutex);
355 	/* check if the notifier still has clients */
356 	if (--masq_refcnt6 > 0)
357 		goto out_unlock;
358 
359 	unregister_inet6addr_notifier(&masq_inet6_notifier);
360 out_unlock:
361 	mutex_unlock(&masq_mutex);
362 }
363 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
364 #endif
365