1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/types.h> 4 #include <linux/atomic.h> 5 #include <linux/inetdevice.h> 6 #include <linux/netfilter.h> 7 #include <linux/netfilter_ipv4.h> 8 #include <linux/netfilter_ipv6.h> 9 10 #include <net/netfilter/ipv4/nf_nat_masquerade.h> 11 #include <net/netfilter/ipv6/nf_nat_masquerade.h> 12 13 static DEFINE_MUTEX(masq_mutex); 14 static unsigned int masq_refcnt4 __read_mostly; 15 static unsigned int masq_refcnt6 __read_mostly; 16 17 unsigned int 18 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, 19 const struct nf_nat_range2 *range, 20 const struct net_device *out) 21 { 22 struct nf_conn *ct; 23 struct nf_conn_nat *nat; 24 enum ip_conntrack_info ctinfo; 25 struct nf_nat_range2 newrange; 26 const struct rtable *rt; 27 __be32 newsrc, nh; 28 29 WARN_ON(hooknum != NF_INET_POST_ROUTING); 30 31 ct = nf_ct_get(skb, &ctinfo); 32 33 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 34 ctinfo == IP_CT_RELATED_REPLY))); 35 36 /* Source address is 0.0.0.0 - locally generated packet that is 37 * probably not supposed to be masqueraded. 38 */ 39 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) 40 return NF_ACCEPT; 41 42 rt = skb_rtable(skb); 43 nh = rt_nexthop(rt, ip_hdr(skb)->daddr); 44 newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); 45 if (!newsrc) { 46 pr_info("%s ate my IP address\n", out->name); 47 return NF_DROP; 48 } 49 50 nat = nf_ct_nat_ext_add(ct); 51 if (nat) 52 nat->masq_index = out->ifindex; 53 54 /* Transfer from original range. */ 55 memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); 56 memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); 57 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 58 newrange.min_addr.ip = newsrc; 59 newrange.max_addr.ip = newsrc; 60 newrange.min_proto = range->min_proto; 61 newrange.max_proto = range->max_proto; 62 63 /* Hand modified range to generic setup. */ 64 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 65 } 66 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); 67 68 static int device_cmp(struct nf_conn *i, void *ifindex) 69 { 70 const struct nf_conn_nat *nat = nfct_nat(i); 71 72 if (!nat) 73 return 0; 74 return nat->masq_index == (int)(long)ifindex; 75 } 76 77 static int masq_device_event(struct notifier_block *this, 78 unsigned long event, 79 void *ptr) 80 { 81 const struct net_device *dev = netdev_notifier_info_to_dev(ptr); 82 struct net *net = dev_net(dev); 83 84 if (event == NETDEV_DOWN) { 85 /* Device was downed. Search entire table for 86 * conntracks which were associated with that device, 87 * and forget them. 88 */ 89 90 nf_ct_iterate_cleanup_net(net, device_cmp, 91 (void *)(long)dev->ifindex, 0, 0); 92 } 93 94 return NOTIFY_DONE; 95 } 96 97 static int inet_cmp(struct nf_conn *ct, void *ptr) 98 { 99 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 100 struct net_device *dev = ifa->ifa_dev->dev; 101 struct nf_conntrack_tuple *tuple; 102 103 if (!device_cmp(ct, (void *)(long)dev->ifindex)) 104 return 0; 105 106 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 107 108 return ifa->ifa_address == tuple->dst.u3.ip; 109 } 110 111 static int masq_inet_event(struct notifier_block *this, 112 unsigned long event, 113 void *ptr) 114 { 115 struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; 116 struct net *net = dev_net(idev->dev); 117 118 /* The masq_dev_notifier will catch the case of the device going 119 * down. So if the inetdev is dead and being destroyed we have 120 * no work to do. Otherwise this is an individual address removal 121 * and we have to perform the flush. 122 */ 123 if (idev->dead) 124 return NOTIFY_DONE; 125 126 if (event == NETDEV_DOWN) 127 nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); 128 129 return NOTIFY_DONE; 130 } 131 132 static struct notifier_block masq_dev_notifier = { 133 .notifier_call = masq_device_event, 134 }; 135 136 static struct notifier_block masq_inet_notifier = { 137 .notifier_call = masq_inet_event, 138 }; 139 140 int nf_nat_masquerade_ipv4_register_notifier(void) 141 { 142 int ret = 0; 143 144 mutex_lock(&masq_mutex); 145 if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) { 146 ret = -EOVERFLOW; 147 goto out_unlock; 148 } 149 150 /* check if the notifier was already set */ 151 if (++masq_refcnt4 > 1) 152 goto out_unlock; 153 154 /* Register for device down reports */ 155 ret = register_netdevice_notifier(&masq_dev_notifier); 156 if (ret) 157 goto err_dec; 158 /* Register IP address change reports */ 159 ret = register_inetaddr_notifier(&masq_inet_notifier); 160 if (ret) 161 goto err_unregister; 162 163 mutex_unlock(&masq_mutex); 164 return ret; 165 166 err_unregister: 167 unregister_netdevice_notifier(&masq_dev_notifier); 168 err_dec: 169 masq_refcnt4--; 170 out_unlock: 171 mutex_unlock(&masq_mutex); 172 return ret; 173 } 174 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier); 175 176 void nf_nat_masquerade_ipv4_unregister_notifier(void) 177 { 178 mutex_lock(&masq_mutex); 179 /* check if the notifier still has clients */ 180 if (--masq_refcnt4 > 0) 181 goto out_unlock; 182 183 unregister_netdevice_notifier(&masq_dev_notifier); 184 unregister_inetaddr_notifier(&masq_inet_notifier); 185 out_unlock: 186 mutex_unlock(&masq_mutex); 187 } 188 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); 189 190 #if IS_ENABLED(CONFIG_IPV6) 191 static atomic_t v6_worker_count __read_mostly; 192 193 static int 194 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, 195 const struct in6_addr *daddr, unsigned int srcprefs, 196 struct in6_addr *saddr) 197 { 198 #ifdef CONFIG_IPV6_MODULE 199 const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); 200 201 if (!v6_ops) 202 return -EHOSTUNREACH; 203 204 return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); 205 #else 206 return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); 207 #endif 208 } 209 210 unsigned int 211 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, 212 const struct net_device *out) 213 { 214 enum ip_conntrack_info ctinfo; 215 struct nf_conn_nat *nat; 216 struct in6_addr src; 217 struct nf_conn *ct; 218 struct nf_nat_range2 newrange; 219 220 ct = nf_ct_get(skb, &ctinfo); 221 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 222 ctinfo == IP_CT_RELATED_REPLY))); 223 224 if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, 225 &ipv6_hdr(skb)->daddr, 0, &src) < 0) 226 return NF_DROP; 227 228 nat = nf_ct_nat_ext_add(ct); 229 if (nat) 230 nat->masq_index = out->ifindex; 231 232 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 233 newrange.min_addr.in6 = src; 234 newrange.max_addr.in6 = src; 235 newrange.min_proto = range->min_proto; 236 newrange.max_proto = range->max_proto; 237 238 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 239 } 240 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); 241 242 struct masq_dev_work { 243 struct work_struct work; 244 struct net *net; 245 struct in6_addr addr; 246 int ifindex; 247 }; 248 249 static int inet6_cmp(struct nf_conn *ct, void *work) 250 { 251 struct masq_dev_work *w = (struct masq_dev_work *)work; 252 struct nf_conntrack_tuple *tuple; 253 254 if (!device_cmp(ct, (void *)(long)w->ifindex)) 255 return 0; 256 257 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 258 259 return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); 260 } 261 262 static void iterate_cleanup_work(struct work_struct *work) 263 { 264 struct masq_dev_work *w; 265 266 w = container_of(work, struct masq_dev_work, work); 267 268 nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0); 269 270 put_net(w->net); 271 kfree(w); 272 atomic_dec(&v6_worker_count); 273 module_put(THIS_MODULE); 274 } 275 276 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). 277 * 278 * Defer it to the system workqueue. 279 * 280 * As we can have 'a lot' of inet_events (depending on amount of ipv6 281 * addresses being deleted), we also need to limit work item queue. 282 */ 283 static int masq_inet6_event(struct notifier_block *this, 284 unsigned long event, void *ptr) 285 { 286 struct inet6_ifaddr *ifa = ptr; 287 const struct net_device *dev; 288 struct masq_dev_work *w; 289 struct net *net; 290 291 if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16) 292 return NOTIFY_DONE; 293 294 dev = ifa->idev->dev; 295 net = maybe_get_net(dev_net(dev)); 296 if (!net) 297 return NOTIFY_DONE; 298 299 if (!try_module_get(THIS_MODULE)) 300 goto err_module; 301 302 w = kmalloc(sizeof(*w), GFP_ATOMIC); 303 if (w) { 304 atomic_inc(&v6_worker_count); 305 306 INIT_WORK(&w->work, iterate_cleanup_work); 307 w->ifindex = dev->ifindex; 308 w->net = net; 309 w->addr = ifa->addr; 310 schedule_work(&w->work); 311 312 return NOTIFY_DONE; 313 } 314 315 module_put(THIS_MODULE); 316 err_module: 317 put_net(net); 318 return NOTIFY_DONE; 319 } 320 321 static struct notifier_block masq_inet6_notifier = { 322 .notifier_call = masq_inet6_event, 323 }; 324 325 int nf_nat_masquerade_ipv6_register_notifier(void) 326 { 327 int ret = 0; 328 329 mutex_lock(&masq_mutex); 330 if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) { 331 ret = -EOVERFLOW; 332 goto out_unlock; 333 } 334 335 /* check if the notifier is already set */ 336 if (++masq_refcnt6 > 1) 337 goto out_unlock; 338 339 ret = register_inet6addr_notifier(&masq_inet6_notifier); 340 if (ret) 341 goto err_dec; 342 343 mutex_unlock(&masq_mutex); 344 return ret; 345 err_dec: 346 masq_refcnt6--; 347 out_unlock: 348 mutex_unlock(&masq_mutex); 349 return ret; 350 } 351 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); 352 353 void nf_nat_masquerade_ipv6_unregister_notifier(void) 354 { 355 mutex_lock(&masq_mutex); 356 /* check if the notifier still has clients */ 357 if (--masq_refcnt6 > 0) 358 goto out_unlock; 359 360 unregister_inet6addr_notifier(&masq_inet6_notifier); 361 out_unlock: 362 mutex_unlock(&masq_mutex); 363 } 364 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); 365 #endif 366