1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/types.h> 4 #include <linux/atomic.h> 5 #include <linux/inetdevice.h> 6 #include <linux/netfilter.h> 7 #include <linux/netfilter_ipv4.h> 8 #include <linux/netfilter_ipv6.h> 9 10 #include <net/netfilter/nf_nat_masquerade.h> 11 12 struct masq_dev_work { 13 struct work_struct work; 14 struct net *net; 15 netns_tracker ns_tracker; 16 union nf_inet_addr addr; 17 int ifindex; 18 int (*iter)(struct nf_conn *i, void *data); 19 }; 20 21 #define MAX_MASQ_WORKER_COUNT 16 22 23 static DEFINE_MUTEX(masq_mutex); 24 static unsigned int masq_refcnt __read_mostly; 25 static atomic_t masq_worker_count __read_mostly; 26 27 unsigned int 28 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, 29 const struct nf_nat_range2 *range, 30 const struct net_device *out) 31 { 32 struct nf_conn *ct; 33 struct nf_conn_nat *nat; 34 enum ip_conntrack_info ctinfo; 35 struct nf_nat_range2 newrange; 36 const struct rtable *rt; 37 __be32 newsrc, nh; 38 39 WARN_ON(hooknum != NF_INET_POST_ROUTING); 40 41 ct = nf_ct_get(skb, &ctinfo); 42 43 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 44 ctinfo == IP_CT_RELATED_REPLY))); 45 46 /* Source address is 0.0.0.0 - locally generated packet that is 47 * probably not supposed to be masqueraded. 48 */ 49 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) 50 return NF_ACCEPT; 51 52 rt = skb_rtable(skb); 53 nh = rt_nexthop(rt, ip_hdr(skb)->daddr); 54 newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); 55 if (!newsrc) { 56 pr_info("%s ate my IP address\n", out->name); 57 return NF_DROP; 58 } 59 60 nat = nf_ct_nat_ext_add(ct); 61 if (nat) 62 nat->masq_index = out->ifindex; 63 64 /* Transfer from original range. */ 65 memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); 66 memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); 67 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 68 newrange.min_addr.ip = newsrc; 69 newrange.max_addr.ip = newsrc; 70 newrange.min_proto = range->min_proto; 71 newrange.max_proto = range->max_proto; 72 73 /* Hand modified range to generic setup. */ 74 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 75 } 76 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); 77 78 static void iterate_cleanup_work(struct work_struct *work) 79 { 80 struct masq_dev_work *w; 81 82 w = container_of(work, struct masq_dev_work, work); 83 84 nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0); 85 86 put_net_track(w->net, &w->ns_tracker); 87 kfree(w); 88 atomic_dec(&masq_worker_count); 89 module_put(THIS_MODULE); 90 } 91 92 /* Iterate conntrack table in the background and remove conntrack entries 93 * that use the device/address being removed. 94 * 95 * In case too many work items have been queued already or memory allocation 96 * fails iteration is skipped, conntrack entries will time out eventually. 97 */ 98 static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, 99 int ifindex, 100 int (*iter)(struct nf_conn *i, void *data), 101 gfp_t gfp_flags) 102 { 103 struct masq_dev_work *w; 104 105 if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) 106 return; 107 108 net = maybe_get_net(net); 109 if (!net) 110 return; 111 112 if (!try_module_get(THIS_MODULE)) 113 goto err_module; 114 115 w = kzalloc(sizeof(*w), gfp_flags); 116 if (w) { 117 /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ 118 atomic_inc(&masq_worker_count); 119 120 INIT_WORK(&w->work, iterate_cleanup_work); 121 w->ifindex = ifindex; 122 w->net = net; 123 netns_tracker_alloc(net, &w->ns_tracker, gfp_flags); 124 w->iter = iter; 125 if (addr) 126 w->addr = *addr; 127 schedule_work(&w->work); 128 return; 129 } 130 131 module_put(THIS_MODULE); 132 err_module: 133 put_net(net); 134 } 135 136 static int device_cmp(struct nf_conn *i, void *arg) 137 { 138 const struct nf_conn_nat *nat = nfct_nat(i); 139 const struct masq_dev_work *w = arg; 140 141 if (!nat) 142 return 0; 143 return nat->masq_index == w->ifindex; 144 } 145 146 static int masq_device_event(struct notifier_block *this, 147 unsigned long event, 148 void *ptr) 149 { 150 const struct net_device *dev = netdev_notifier_info_to_dev(ptr); 151 struct net *net = dev_net(dev); 152 153 if (event == NETDEV_DOWN) { 154 /* Device was downed. Search entire table for 155 * conntracks which were associated with that device, 156 * and forget them. 157 */ 158 159 nf_nat_masq_schedule(net, NULL, dev->ifindex, 160 device_cmp, GFP_KERNEL); 161 } 162 163 return NOTIFY_DONE; 164 } 165 166 static int inet_cmp(struct nf_conn *ct, void *ptr) 167 { 168 struct nf_conntrack_tuple *tuple; 169 struct masq_dev_work *w = ptr; 170 171 if (!device_cmp(ct, ptr)) 172 return 0; 173 174 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 175 176 return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); 177 } 178 179 static int masq_inet_event(struct notifier_block *this, 180 unsigned long event, 181 void *ptr) 182 { 183 const struct in_ifaddr *ifa = ptr; 184 const struct in_device *idev; 185 const struct net_device *dev; 186 union nf_inet_addr addr; 187 188 if (event != NETDEV_DOWN) 189 return NOTIFY_DONE; 190 191 /* The masq_dev_notifier will catch the case of the device going 192 * down. So if the inetdev is dead and being destroyed we have 193 * no work to do. Otherwise this is an individual address removal 194 * and we have to perform the flush. 195 */ 196 idev = ifa->ifa_dev; 197 if (idev->dead) 198 return NOTIFY_DONE; 199 200 memset(&addr, 0, sizeof(addr)); 201 202 addr.ip = ifa->ifa_address; 203 204 dev = idev->dev; 205 nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, 206 inet_cmp, GFP_KERNEL); 207 208 return NOTIFY_DONE; 209 } 210 211 static struct notifier_block masq_dev_notifier = { 212 .notifier_call = masq_device_event, 213 }; 214 215 static struct notifier_block masq_inet_notifier = { 216 .notifier_call = masq_inet_event, 217 }; 218 219 #if IS_ENABLED(CONFIG_IPV6) 220 static int 221 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, 222 const struct in6_addr *daddr, unsigned int srcprefs, 223 struct in6_addr *saddr) 224 { 225 #ifdef CONFIG_IPV6_MODULE 226 const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); 227 228 if (!v6_ops) 229 return -EHOSTUNREACH; 230 231 return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); 232 #else 233 return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); 234 #endif 235 } 236 237 unsigned int 238 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, 239 const struct net_device *out) 240 { 241 enum ip_conntrack_info ctinfo; 242 struct nf_conn_nat *nat; 243 struct in6_addr src; 244 struct nf_conn *ct; 245 struct nf_nat_range2 newrange; 246 247 ct = nf_ct_get(skb, &ctinfo); 248 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 249 ctinfo == IP_CT_RELATED_REPLY))); 250 251 if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, 252 &ipv6_hdr(skb)->daddr, 0, &src) < 0) 253 return NF_DROP; 254 255 nat = nf_ct_nat_ext_add(ct); 256 if (nat) 257 nat->masq_index = out->ifindex; 258 259 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 260 newrange.min_addr.in6 = src; 261 newrange.max_addr.in6 = src; 262 newrange.min_proto = range->min_proto; 263 newrange.max_proto = range->max_proto; 264 265 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 266 } 267 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); 268 269 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). 270 * 271 * Defer it to the system workqueue. 272 * 273 * As we can have 'a lot' of inet_events (depending on amount of ipv6 274 * addresses being deleted), we also need to limit work item queue. 275 */ 276 static int masq_inet6_event(struct notifier_block *this, 277 unsigned long event, void *ptr) 278 { 279 struct inet6_ifaddr *ifa = ptr; 280 const struct net_device *dev; 281 union nf_inet_addr addr; 282 283 if (event != NETDEV_DOWN) 284 return NOTIFY_DONE; 285 286 dev = ifa->idev->dev; 287 288 memset(&addr, 0, sizeof(addr)); 289 290 addr.in6 = ifa->addr; 291 292 nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, 293 GFP_ATOMIC); 294 return NOTIFY_DONE; 295 } 296 297 static struct notifier_block masq_inet6_notifier = { 298 .notifier_call = masq_inet6_event, 299 }; 300 301 static int nf_nat_masquerade_ipv6_register_notifier(void) 302 { 303 return register_inet6addr_notifier(&masq_inet6_notifier); 304 } 305 #else 306 static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } 307 #endif 308 309 int nf_nat_masquerade_inet_register_notifiers(void) 310 { 311 int ret = 0; 312 313 mutex_lock(&masq_mutex); 314 if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { 315 ret = -EOVERFLOW; 316 goto out_unlock; 317 } 318 319 /* check if the notifier was already set */ 320 if (++masq_refcnt > 1) 321 goto out_unlock; 322 323 /* Register for device down reports */ 324 ret = register_netdevice_notifier(&masq_dev_notifier); 325 if (ret) 326 goto err_dec; 327 /* Register IP address change reports */ 328 ret = register_inetaddr_notifier(&masq_inet_notifier); 329 if (ret) 330 goto err_unregister; 331 332 ret = nf_nat_masquerade_ipv6_register_notifier(); 333 if (ret) 334 goto err_unreg_inet; 335 336 mutex_unlock(&masq_mutex); 337 return ret; 338 err_unreg_inet: 339 unregister_inetaddr_notifier(&masq_inet_notifier); 340 err_unregister: 341 unregister_netdevice_notifier(&masq_dev_notifier); 342 err_dec: 343 masq_refcnt--; 344 out_unlock: 345 mutex_unlock(&masq_mutex); 346 return ret; 347 } 348 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); 349 350 void nf_nat_masquerade_inet_unregister_notifiers(void) 351 { 352 mutex_lock(&masq_mutex); 353 /* check if the notifiers still have clients */ 354 if (--masq_refcnt > 0) 355 goto out_unlock; 356 357 unregister_netdevice_notifier(&masq_dev_notifier); 358 unregister_inetaddr_notifier(&masq_inet_notifier); 359 #if IS_ENABLED(CONFIG_IPV6) 360 unregister_inet6addr_notifier(&masq_inet6_notifier); 361 #endif 362 out_unlock: 363 mutex_unlock(&masq_mutex); 364 } 365 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers); 366