1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/types.h> 4 #include <linux/atomic.h> 5 #include <linux/inetdevice.h> 6 #include <linux/netfilter.h> 7 #include <linux/netfilter_ipv4.h> 8 #include <linux/netfilter_ipv6.h> 9 10 #include <net/netfilter/nf_nat_masquerade.h> 11 12 static DEFINE_MUTEX(masq_mutex); 13 static unsigned int masq_refcnt4 __read_mostly; 14 static unsigned int masq_refcnt6 __read_mostly; 15 16 unsigned int 17 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, 18 const struct nf_nat_range2 *range, 19 const struct net_device *out) 20 { 21 struct nf_conn *ct; 22 struct nf_conn_nat *nat; 23 enum ip_conntrack_info ctinfo; 24 struct nf_nat_range2 newrange; 25 const struct rtable *rt; 26 __be32 newsrc, nh; 27 28 WARN_ON(hooknum != NF_INET_POST_ROUTING); 29 30 ct = nf_ct_get(skb, &ctinfo); 31 32 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 33 ctinfo == IP_CT_RELATED_REPLY))); 34 35 /* Source address is 0.0.0.0 - locally generated packet that is 36 * probably not supposed to be masqueraded. 37 */ 38 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) 39 return NF_ACCEPT; 40 41 rt = skb_rtable(skb); 42 nh = rt_nexthop(rt, ip_hdr(skb)->daddr); 43 newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); 44 if (!newsrc) { 45 pr_info("%s ate my IP address\n", out->name); 46 return NF_DROP; 47 } 48 49 nat = nf_ct_nat_ext_add(ct); 50 if (nat) 51 nat->masq_index = out->ifindex; 52 53 /* Transfer from original range. */ 54 memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); 55 memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); 56 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 57 newrange.min_addr.ip = newsrc; 58 newrange.max_addr.ip = newsrc; 59 newrange.min_proto = range->min_proto; 60 newrange.max_proto = range->max_proto; 61 62 /* Hand modified range to generic setup. */ 63 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 64 } 65 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); 66 67 static int device_cmp(struct nf_conn *i, void *ifindex) 68 { 69 const struct nf_conn_nat *nat = nfct_nat(i); 70 71 if (!nat) 72 return 0; 73 return nat->masq_index == (int)(long)ifindex; 74 } 75 76 static int masq_device_event(struct notifier_block *this, 77 unsigned long event, 78 void *ptr) 79 { 80 const struct net_device *dev = netdev_notifier_info_to_dev(ptr); 81 struct net *net = dev_net(dev); 82 83 if (event == NETDEV_DOWN) { 84 /* Device was downed. Search entire table for 85 * conntracks which were associated with that device, 86 * and forget them. 87 */ 88 89 nf_ct_iterate_cleanup_net(net, device_cmp, 90 (void *)(long)dev->ifindex, 0, 0); 91 } 92 93 return NOTIFY_DONE; 94 } 95 96 static int inet_cmp(struct nf_conn *ct, void *ptr) 97 { 98 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 99 struct net_device *dev = ifa->ifa_dev->dev; 100 struct nf_conntrack_tuple *tuple; 101 102 if (!device_cmp(ct, (void *)(long)dev->ifindex)) 103 return 0; 104 105 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 106 107 return ifa->ifa_address == tuple->dst.u3.ip; 108 } 109 110 static int masq_inet_event(struct notifier_block *this, 111 unsigned long event, 112 void *ptr) 113 { 114 struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; 115 struct net *net = dev_net(idev->dev); 116 117 /* The masq_dev_notifier will catch the case of the device going 118 * down. So if the inetdev is dead and being destroyed we have 119 * no work to do. Otherwise this is an individual address removal 120 * and we have to perform the flush. 121 */ 122 if (idev->dead) 123 return NOTIFY_DONE; 124 125 if (event == NETDEV_DOWN) 126 nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); 127 128 return NOTIFY_DONE; 129 } 130 131 static struct notifier_block masq_dev_notifier = { 132 .notifier_call = masq_device_event, 133 }; 134 135 static struct notifier_block masq_inet_notifier = { 136 .notifier_call = masq_inet_event, 137 }; 138 139 int nf_nat_masquerade_ipv4_register_notifier(void) 140 { 141 int ret = 0; 142 143 mutex_lock(&masq_mutex); 144 if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) { 145 ret = -EOVERFLOW; 146 goto out_unlock; 147 } 148 149 /* check if the notifier was already set */ 150 if (++masq_refcnt4 > 1) 151 goto out_unlock; 152 153 /* Register for device down reports */ 154 ret = register_netdevice_notifier(&masq_dev_notifier); 155 if (ret) 156 goto err_dec; 157 /* Register IP address change reports */ 158 ret = register_inetaddr_notifier(&masq_inet_notifier); 159 if (ret) 160 goto err_unregister; 161 162 mutex_unlock(&masq_mutex); 163 return ret; 164 165 err_unregister: 166 unregister_netdevice_notifier(&masq_dev_notifier); 167 err_dec: 168 masq_refcnt4--; 169 out_unlock: 170 mutex_unlock(&masq_mutex); 171 return ret; 172 } 173 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier); 174 175 void nf_nat_masquerade_ipv4_unregister_notifier(void) 176 { 177 mutex_lock(&masq_mutex); 178 /* check if the notifier still has clients */ 179 if (--masq_refcnt4 > 0) 180 goto out_unlock; 181 182 unregister_netdevice_notifier(&masq_dev_notifier); 183 unregister_inetaddr_notifier(&masq_inet_notifier); 184 out_unlock: 185 mutex_unlock(&masq_mutex); 186 } 187 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); 188 189 #if IS_ENABLED(CONFIG_IPV6) 190 static atomic_t v6_worker_count __read_mostly; 191 192 static int 193 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, 194 const struct in6_addr *daddr, unsigned int srcprefs, 195 struct in6_addr *saddr) 196 { 197 #ifdef CONFIG_IPV6_MODULE 198 const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); 199 200 if (!v6_ops) 201 return -EHOSTUNREACH; 202 203 return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); 204 #else 205 return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); 206 #endif 207 } 208 209 unsigned int 210 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, 211 const struct net_device *out) 212 { 213 enum ip_conntrack_info ctinfo; 214 struct nf_conn_nat *nat; 215 struct in6_addr src; 216 struct nf_conn *ct; 217 struct nf_nat_range2 newrange; 218 219 ct = nf_ct_get(skb, &ctinfo); 220 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 221 ctinfo == IP_CT_RELATED_REPLY))); 222 223 if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, 224 &ipv6_hdr(skb)->daddr, 0, &src) < 0) 225 return NF_DROP; 226 227 nat = nf_ct_nat_ext_add(ct); 228 if (nat) 229 nat->masq_index = out->ifindex; 230 231 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 232 newrange.min_addr.in6 = src; 233 newrange.max_addr.in6 = src; 234 newrange.min_proto = range->min_proto; 235 newrange.max_proto = range->max_proto; 236 237 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 238 } 239 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); 240 241 struct masq_dev_work { 242 struct work_struct work; 243 struct net *net; 244 struct in6_addr addr; 245 int ifindex; 246 }; 247 248 static int inet6_cmp(struct nf_conn *ct, void *work) 249 { 250 struct masq_dev_work *w = (struct masq_dev_work *)work; 251 struct nf_conntrack_tuple *tuple; 252 253 if (!device_cmp(ct, (void *)(long)w->ifindex)) 254 return 0; 255 256 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 257 258 return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); 259 } 260 261 static void iterate_cleanup_work(struct work_struct *work) 262 { 263 struct masq_dev_work *w; 264 265 w = container_of(work, struct masq_dev_work, work); 266 267 nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0); 268 269 put_net(w->net); 270 kfree(w); 271 atomic_dec(&v6_worker_count); 272 module_put(THIS_MODULE); 273 } 274 275 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). 276 * 277 * Defer it to the system workqueue. 278 * 279 * As we can have 'a lot' of inet_events (depending on amount of ipv6 280 * addresses being deleted), we also need to limit work item queue. 281 */ 282 static int masq_inet6_event(struct notifier_block *this, 283 unsigned long event, void *ptr) 284 { 285 struct inet6_ifaddr *ifa = ptr; 286 const struct net_device *dev; 287 struct masq_dev_work *w; 288 struct net *net; 289 290 if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16) 291 return NOTIFY_DONE; 292 293 dev = ifa->idev->dev; 294 net = maybe_get_net(dev_net(dev)); 295 if (!net) 296 return NOTIFY_DONE; 297 298 if (!try_module_get(THIS_MODULE)) 299 goto err_module; 300 301 w = kmalloc(sizeof(*w), GFP_ATOMIC); 302 if (w) { 303 atomic_inc(&v6_worker_count); 304 305 INIT_WORK(&w->work, iterate_cleanup_work); 306 w->ifindex = dev->ifindex; 307 w->net = net; 308 w->addr = ifa->addr; 309 schedule_work(&w->work); 310 311 return NOTIFY_DONE; 312 } 313 314 module_put(THIS_MODULE); 315 err_module: 316 put_net(net); 317 return NOTIFY_DONE; 318 } 319 320 static struct notifier_block masq_inet6_notifier = { 321 .notifier_call = masq_inet6_event, 322 }; 323 324 int nf_nat_masquerade_ipv6_register_notifier(void) 325 { 326 int ret = 0; 327 328 mutex_lock(&masq_mutex); 329 if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) { 330 ret = -EOVERFLOW; 331 goto out_unlock; 332 } 333 334 /* check if the notifier is already set */ 335 if (++masq_refcnt6 > 1) 336 goto out_unlock; 337 338 ret = register_inet6addr_notifier(&masq_inet6_notifier); 339 if (ret) 340 goto err_dec; 341 342 mutex_unlock(&masq_mutex); 343 return ret; 344 err_dec: 345 masq_refcnt6--; 346 out_unlock: 347 mutex_unlock(&masq_mutex); 348 return ret; 349 } 350 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); 351 352 void nf_nat_masquerade_ipv6_unregister_notifier(void) 353 { 354 mutex_lock(&masq_mutex); 355 /* check if the notifier still has clients */ 356 if (--masq_refcnt6 > 0) 357 goto out_unlock; 358 359 unregister_inet6addr_notifier(&masq_inet6_notifier); 360 out_unlock: 361 mutex_unlock(&masq_mutex); 362 } 363 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); 364 #endif 365