1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/types.h> 4 #include <linux/atomic.h> 5 #include <linux/inetdevice.h> 6 #include <linux/netfilter.h> 7 #include <linux/netfilter_ipv4.h> 8 #include <linux/netfilter_ipv6.h> 9 10 #include <net/netfilter/nf_nat_masquerade.h> 11 12 static DEFINE_MUTEX(masq_mutex); 13 static unsigned int masq_refcnt __read_mostly; 14 15 unsigned int 16 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, 17 const struct nf_nat_range2 *range, 18 const struct net_device *out) 19 { 20 struct nf_conn *ct; 21 struct nf_conn_nat *nat; 22 enum ip_conntrack_info ctinfo; 23 struct nf_nat_range2 newrange; 24 const struct rtable *rt; 25 __be32 newsrc, nh; 26 27 WARN_ON(hooknum != NF_INET_POST_ROUTING); 28 29 ct = nf_ct_get(skb, &ctinfo); 30 31 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 32 ctinfo == IP_CT_RELATED_REPLY))); 33 34 /* Source address is 0.0.0.0 - locally generated packet that is 35 * probably not supposed to be masqueraded. 36 */ 37 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) 38 return NF_ACCEPT; 39 40 rt = skb_rtable(skb); 41 nh = rt_nexthop(rt, ip_hdr(skb)->daddr); 42 newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); 43 if (!newsrc) { 44 pr_info("%s ate my IP address\n", out->name); 45 return NF_DROP; 46 } 47 48 nat = nf_ct_nat_ext_add(ct); 49 if (nat) 50 nat->masq_index = out->ifindex; 51 52 /* Transfer from original range. */ 53 memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); 54 memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); 55 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 56 newrange.min_addr.ip = newsrc; 57 newrange.max_addr.ip = newsrc; 58 newrange.min_proto = range->min_proto; 59 newrange.max_proto = range->max_proto; 60 61 /* Hand modified range to generic setup. */ 62 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 63 } 64 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); 65 66 static int device_cmp(struct nf_conn *i, void *ifindex) 67 { 68 const struct nf_conn_nat *nat = nfct_nat(i); 69 70 if (!nat) 71 return 0; 72 return nat->masq_index == (int)(long)ifindex; 73 } 74 75 static int masq_device_event(struct notifier_block *this, 76 unsigned long event, 77 void *ptr) 78 { 79 const struct net_device *dev = netdev_notifier_info_to_dev(ptr); 80 struct net *net = dev_net(dev); 81 82 if (event == NETDEV_DOWN) { 83 /* Device was downed. Search entire table for 84 * conntracks which were associated with that device, 85 * and forget them. 86 */ 87 88 nf_ct_iterate_cleanup_net(net, device_cmp, 89 (void *)(long)dev->ifindex, 0, 0); 90 } 91 92 return NOTIFY_DONE; 93 } 94 95 static int inet_cmp(struct nf_conn *ct, void *ptr) 96 { 97 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 98 struct net_device *dev = ifa->ifa_dev->dev; 99 struct nf_conntrack_tuple *tuple; 100 101 if (!device_cmp(ct, (void *)(long)dev->ifindex)) 102 return 0; 103 104 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 105 106 return ifa->ifa_address == tuple->dst.u3.ip; 107 } 108 109 static int masq_inet_event(struct notifier_block *this, 110 unsigned long event, 111 void *ptr) 112 { 113 struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; 114 struct net *net = dev_net(idev->dev); 115 116 /* The masq_dev_notifier will catch the case of the device going 117 * down. So if the inetdev is dead and being destroyed we have 118 * no work to do. Otherwise this is an individual address removal 119 * and we have to perform the flush. 120 */ 121 if (idev->dead) 122 return NOTIFY_DONE; 123 124 if (event == NETDEV_DOWN) 125 nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); 126 127 return NOTIFY_DONE; 128 } 129 130 static struct notifier_block masq_dev_notifier = { 131 .notifier_call = masq_device_event, 132 }; 133 134 static struct notifier_block masq_inet_notifier = { 135 .notifier_call = masq_inet_event, 136 }; 137 138 #if IS_ENABLED(CONFIG_IPV6) 139 static atomic_t v6_worker_count __read_mostly; 140 141 static int 142 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, 143 const struct in6_addr *daddr, unsigned int srcprefs, 144 struct in6_addr *saddr) 145 { 146 #ifdef CONFIG_IPV6_MODULE 147 const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); 148 149 if (!v6_ops) 150 return -EHOSTUNREACH; 151 152 return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); 153 #else 154 return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); 155 #endif 156 } 157 158 unsigned int 159 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, 160 const struct net_device *out) 161 { 162 enum ip_conntrack_info ctinfo; 163 struct nf_conn_nat *nat; 164 struct in6_addr src; 165 struct nf_conn *ct; 166 struct nf_nat_range2 newrange; 167 168 ct = nf_ct_get(skb, &ctinfo); 169 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 170 ctinfo == IP_CT_RELATED_REPLY))); 171 172 if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, 173 &ipv6_hdr(skb)->daddr, 0, &src) < 0) 174 return NF_DROP; 175 176 nat = nf_ct_nat_ext_add(ct); 177 if (nat) 178 nat->masq_index = out->ifindex; 179 180 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 181 newrange.min_addr.in6 = src; 182 newrange.max_addr.in6 = src; 183 newrange.min_proto = range->min_proto; 184 newrange.max_proto = range->max_proto; 185 186 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 187 } 188 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); 189 190 struct masq_dev_work { 191 struct work_struct work; 192 struct net *net; 193 struct in6_addr addr; 194 int ifindex; 195 }; 196 197 static int inet6_cmp(struct nf_conn *ct, void *work) 198 { 199 struct masq_dev_work *w = (struct masq_dev_work *)work; 200 struct nf_conntrack_tuple *tuple; 201 202 if (!device_cmp(ct, (void *)(long)w->ifindex)) 203 return 0; 204 205 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 206 207 return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); 208 } 209 210 static void iterate_cleanup_work(struct work_struct *work) 211 { 212 struct masq_dev_work *w; 213 214 w = container_of(work, struct masq_dev_work, work); 215 216 nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0); 217 218 put_net(w->net); 219 kfree(w); 220 atomic_dec(&v6_worker_count); 221 module_put(THIS_MODULE); 222 } 223 224 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). 225 * 226 * Defer it to the system workqueue. 227 * 228 * As we can have 'a lot' of inet_events (depending on amount of ipv6 229 * addresses being deleted), we also need to limit work item queue. 230 */ 231 static int masq_inet6_event(struct notifier_block *this, 232 unsigned long event, void *ptr) 233 { 234 struct inet6_ifaddr *ifa = ptr; 235 const struct net_device *dev; 236 struct masq_dev_work *w; 237 struct net *net; 238 239 if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16) 240 return NOTIFY_DONE; 241 242 dev = ifa->idev->dev; 243 net = maybe_get_net(dev_net(dev)); 244 if (!net) 245 return NOTIFY_DONE; 246 247 if (!try_module_get(THIS_MODULE)) 248 goto err_module; 249 250 w = kmalloc(sizeof(*w), GFP_ATOMIC); 251 if (w) { 252 atomic_inc(&v6_worker_count); 253 254 INIT_WORK(&w->work, iterate_cleanup_work); 255 w->ifindex = dev->ifindex; 256 w->net = net; 257 w->addr = ifa->addr; 258 schedule_work(&w->work); 259 260 return NOTIFY_DONE; 261 } 262 263 module_put(THIS_MODULE); 264 err_module: 265 put_net(net); 266 return NOTIFY_DONE; 267 } 268 269 static struct notifier_block masq_inet6_notifier = { 270 .notifier_call = masq_inet6_event, 271 }; 272 273 static int nf_nat_masquerade_ipv6_register_notifier(void) 274 { 275 return register_inet6addr_notifier(&masq_inet6_notifier); 276 } 277 #else 278 static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } 279 #endif 280 281 int nf_nat_masquerade_inet_register_notifiers(void) 282 { 283 int ret = 0; 284 285 mutex_lock(&masq_mutex); 286 if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { 287 ret = -EOVERFLOW; 288 goto out_unlock; 289 } 290 291 /* check if the notifier was already set */ 292 if (++masq_refcnt > 1) 293 goto out_unlock; 294 295 /* Register for device down reports */ 296 ret = register_netdevice_notifier(&masq_dev_notifier); 297 if (ret) 298 goto err_dec; 299 /* Register IP address change reports */ 300 ret = register_inetaddr_notifier(&masq_inet_notifier); 301 if (ret) 302 goto err_unregister; 303 304 ret = nf_nat_masquerade_ipv6_register_notifier(); 305 if (ret) 306 goto err_unreg_inet; 307 308 mutex_unlock(&masq_mutex); 309 return ret; 310 err_unreg_inet: 311 unregister_inetaddr_notifier(&masq_inet_notifier); 312 err_unregister: 313 unregister_netdevice_notifier(&masq_dev_notifier); 314 err_dec: 315 masq_refcnt--; 316 out_unlock: 317 mutex_unlock(&masq_mutex); 318 return ret; 319 } 320 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); 321 322 void nf_nat_masquerade_inet_unregister_notifiers(void) 323 { 324 mutex_lock(&masq_mutex); 325 /* check if the notifiers still have clients */ 326 if (--masq_refcnt > 0) 327 goto out_unlock; 328 329 unregister_netdevice_notifier(&masq_dev_notifier); 330 unregister_inetaddr_notifier(&masq_inet_notifier); 331 #if IS_ENABLED(CONFIG_IPV6) 332 unregister_inet6addr_notifier(&masq_inet6_notifier); 333 #endif 334 out_unlock: 335 mutex_unlock(&masq_mutex); 336 } 337 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers); 338