xref: /openbmc/linux/net/netfilter/nf_nat_proto.c (revision 0a30ba50)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* (C) 1999-2001 Paul `Rusty' Russell
3  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
4  */
5 
6 #include <linux/types.h>
7 #include <linux/export.h>
8 #include <linux/init.h>
9 #include <linux/udp.h>
10 #include <linux/tcp.h>
11 #include <linux/icmp.h>
12 #include <linux/icmpv6.h>
13 
14 #include <linux/dccp.h>
15 #include <linux/sctp.h>
16 #include <net/sctp/checksum.h>
17 
18 #include <linux/netfilter.h>
19 #include <net/netfilter/nf_nat.h>
20 
21 #include <linux/ipv6.h>
22 #include <linux/netfilter_ipv6.h>
23 #include <net/checksum.h>
24 #include <net/ip6_checksum.h>
25 #include <net/ip6_route.h>
26 #include <net/xfrm.h>
27 #include <net/ipv6.h>
28 
29 #include <net/netfilter/nf_conntrack_core.h>
30 #include <net/netfilter/nf_conntrack.h>
31 #include <linux/netfilter/nfnetlink_conntrack.h>
32 
33 static void nf_csum_update(struct sk_buff *skb,
34 			   unsigned int iphdroff, __sum16 *check,
35 			   const struct nf_conntrack_tuple *t,
36 			   enum nf_nat_manip_type maniptype);
37 
38 static void
39 __udp_manip_pkt(struct sk_buff *skb,
40 	        unsigned int iphdroff, struct udphdr *hdr,
41 	        const struct nf_conntrack_tuple *tuple,
42 	        enum nf_nat_manip_type maniptype, bool do_csum)
43 {
44 	__be16 *portptr, newport;
45 
46 	if (maniptype == NF_NAT_MANIP_SRC) {
47 		/* Get rid of src port */
48 		newport = tuple->src.u.udp.port;
49 		portptr = &hdr->source;
50 	} else {
51 		/* Get rid of dst port */
52 		newport = tuple->dst.u.udp.port;
53 		portptr = &hdr->dest;
54 	}
55 	if (do_csum) {
56 		nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
57 		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
58 					 false);
59 		if (!hdr->check)
60 			hdr->check = CSUM_MANGLED_0;
61 	}
62 	*portptr = newport;
63 }
64 
65 static bool udp_manip_pkt(struct sk_buff *skb,
66 			  unsigned int iphdroff, unsigned int hdroff,
67 			  const struct nf_conntrack_tuple *tuple,
68 			  enum nf_nat_manip_type maniptype)
69 {
70 	struct udphdr *hdr;
71 	bool do_csum;
72 
73 	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
74 		return false;
75 
76 	hdr = (struct udphdr *)(skb->data + hdroff);
77 	do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
78 
79 	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
80 	return true;
81 }
82 
83 static bool udplite_manip_pkt(struct sk_buff *skb,
84 			      unsigned int iphdroff, unsigned int hdroff,
85 			      const struct nf_conntrack_tuple *tuple,
86 			      enum nf_nat_manip_type maniptype)
87 {
88 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
89 	struct udphdr *hdr;
90 
91 	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
92 		return false;
93 
94 	hdr = (struct udphdr *)(skb->data + hdroff);
95 	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
96 #endif
97 	return true;
98 }
99 
100 static bool
101 sctp_manip_pkt(struct sk_buff *skb,
102 	       unsigned int iphdroff, unsigned int hdroff,
103 	       const struct nf_conntrack_tuple *tuple,
104 	       enum nf_nat_manip_type maniptype)
105 {
106 #ifdef CONFIG_NF_CT_PROTO_SCTP
107 	struct sctphdr *hdr;
108 	int hdrsize = 8;
109 
110 	/* This could be an inner header returned in imcp packet; in such
111 	 * cases we cannot update the checksum field since it is outside
112 	 * of the 8 bytes of transport layer headers we are guaranteed.
113 	 */
114 	if (skb->len >= hdroff + sizeof(*hdr))
115 		hdrsize = sizeof(*hdr);
116 
117 	if (skb_ensure_writable(skb, hdroff + hdrsize))
118 		return false;
119 
120 	hdr = (struct sctphdr *)(skb->data + hdroff);
121 
122 	if (maniptype == NF_NAT_MANIP_SRC) {
123 		/* Get rid of src port */
124 		hdr->source = tuple->src.u.sctp.port;
125 	} else {
126 		/* Get rid of dst port */
127 		hdr->dest = tuple->dst.u.sctp.port;
128 	}
129 
130 	if (hdrsize < sizeof(*hdr))
131 		return true;
132 
133 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
134 		hdr->checksum = sctp_compute_cksum(skb, hdroff);
135 		skb->ip_summed = CHECKSUM_NONE;
136 	}
137 
138 #endif
139 	return true;
140 }
141 
142 static bool
143 tcp_manip_pkt(struct sk_buff *skb,
144 	      unsigned int iphdroff, unsigned int hdroff,
145 	      const struct nf_conntrack_tuple *tuple,
146 	      enum nf_nat_manip_type maniptype)
147 {
148 	struct tcphdr *hdr;
149 	__be16 *portptr, newport, oldport;
150 	int hdrsize = 8; /* TCP connection tracking guarantees this much */
151 
152 	/* this could be a inner header returned in icmp packet; in such
153 	   cases we cannot update the checksum field since it is outside of
154 	   the 8 bytes of transport layer headers we are guaranteed */
155 	if (skb->len >= hdroff + sizeof(struct tcphdr))
156 		hdrsize = sizeof(struct tcphdr);
157 
158 	if (skb_ensure_writable(skb, hdroff + hdrsize))
159 		return false;
160 
161 	hdr = (struct tcphdr *)(skb->data + hdroff);
162 
163 	if (maniptype == NF_NAT_MANIP_SRC) {
164 		/* Get rid of src port */
165 		newport = tuple->src.u.tcp.port;
166 		portptr = &hdr->source;
167 	} else {
168 		/* Get rid of dst port */
169 		newport = tuple->dst.u.tcp.port;
170 		portptr = &hdr->dest;
171 	}
172 
173 	oldport = *portptr;
174 	*portptr = newport;
175 
176 	if (hdrsize < sizeof(*hdr))
177 		return true;
178 
179 	nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
180 	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
181 	return true;
182 }
183 
184 static bool
185 dccp_manip_pkt(struct sk_buff *skb,
186 	       unsigned int iphdroff, unsigned int hdroff,
187 	       const struct nf_conntrack_tuple *tuple,
188 	       enum nf_nat_manip_type maniptype)
189 {
190 #ifdef CONFIG_NF_CT_PROTO_DCCP
191 	struct dccp_hdr *hdr;
192 	__be16 *portptr, oldport, newport;
193 	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
194 
195 	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
196 		hdrsize = sizeof(struct dccp_hdr);
197 
198 	if (skb_ensure_writable(skb, hdroff + hdrsize))
199 		return false;
200 
201 	hdr = (struct dccp_hdr *)(skb->data + hdroff);
202 
203 	if (maniptype == NF_NAT_MANIP_SRC) {
204 		newport = tuple->src.u.dccp.port;
205 		portptr = &hdr->dccph_sport;
206 	} else {
207 		newport = tuple->dst.u.dccp.port;
208 		portptr = &hdr->dccph_dport;
209 	}
210 
211 	oldport = *portptr;
212 	*portptr = newport;
213 
214 	if (hdrsize < sizeof(*hdr))
215 		return true;
216 
217 	nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
218 	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
219 				 false);
220 #endif
221 	return true;
222 }
223 
224 static bool
225 icmp_manip_pkt(struct sk_buff *skb,
226 	       unsigned int iphdroff, unsigned int hdroff,
227 	       const struct nf_conntrack_tuple *tuple,
228 	       enum nf_nat_manip_type maniptype)
229 {
230 	struct icmphdr *hdr;
231 
232 	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
233 		return false;
234 
235 	hdr = (struct icmphdr *)(skb->data + hdroff);
236 	inet_proto_csum_replace2(&hdr->checksum, skb,
237 				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
238 	hdr->un.echo.id = tuple->src.u.icmp.id;
239 	return true;
240 }
241 
242 static bool
243 icmpv6_manip_pkt(struct sk_buff *skb,
244 		 unsigned int iphdroff, unsigned int hdroff,
245 		 const struct nf_conntrack_tuple *tuple,
246 		 enum nf_nat_manip_type maniptype)
247 {
248 	struct icmp6hdr *hdr;
249 
250 	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
251 		return false;
252 
253 	hdr = (struct icmp6hdr *)(skb->data + hdroff);
254 	nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
255 	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
256 	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
257 		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
258 					 hdr->icmp6_identifier,
259 					 tuple->src.u.icmp.id, false);
260 		hdr->icmp6_identifier = tuple->src.u.icmp.id;
261 	}
262 	return true;
263 }
264 
265 /* manipulate a GRE packet according to maniptype */
266 static bool
267 gre_manip_pkt(struct sk_buff *skb,
268 	      unsigned int iphdroff, unsigned int hdroff,
269 	      const struct nf_conntrack_tuple *tuple,
270 	      enum nf_nat_manip_type maniptype)
271 {
272 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
273 	const struct gre_base_hdr *greh;
274 	struct pptp_gre_header *pgreh;
275 
276 	/* pgreh includes two optional 32bit fields which are not required
277 	 * to be there.  That's where the magic '8' comes from */
278 	if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
279 		return false;
280 
281 	greh = (void *)skb->data + hdroff;
282 	pgreh = (struct pptp_gre_header *)greh;
283 
284 	/* we only have destination manip of a packet, since 'source key'
285 	 * is not present in the packet itself */
286 	if (maniptype != NF_NAT_MANIP_DST)
287 		return true;
288 
289 	switch (greh->flags & GRE_VERSION) {
290 	case GRE_VERSION_0:
291 		/* We do not currently NAT any GREv0 packets.
292 		 * Try to behave like "nf_nat_proto_unknown" */
293 		break;
294 	case GRE_VERSION_1:
295 		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
296 		pgreh->call_id = tuple->dst.u.gre.key;
297 		break;
298 	default:
299 		pr_debug("can't nat unknown GRE version\n");
300 		return false;
301 	}
302 #endif
303 	return true;
304 }
305 
306 static bool l4proto_manip_pkt(struct sk_buff *skb,
307 			      unsigned int iphdroff, unsigned int hdroff,
308 			      const struct nf_conntrack_tuple *tuple,
309 			      enum nf_nat_manip_type maniptype)
310 {
311 	switch (tuple->dst.protonum) {
312 	case IPPROTO_TCP:
313 		return tcp_manip_pkt(skb, iphdroff, hdroff,
314 				     tuple, maniptype);
315 	case IPPROTO_UDP:
316 		return udp_manip_pkt(skb, iphdroff, hdroff,
317 				     tuple, maniptype);
318 	case IPPROTO_UDPLITE:
319 		return udplite_manip_pkt(skb, iphdroff, hdroff,
320 					 tuple, maniptype);
321 	case IPPROTO_SCTP:
322 		return sctp_manip_pkt(skb, iphdroff, hdroff,
323 				      tuple, maniptype);
324 	case IPPROTO_ICMP:
325 		return icmp_manip_pkt(skb, iphdroff, hdroff,
326 				      tuple, maniptype);
327 	case IPPROTO_ICMPV6:
328 		return icmpv6_manip_pkt(skb, iphdroff, hdroff,
329 					tuple, maniptype);
330 	case IPPROTO_DCCP:
331 		return dccp_manip_pkt(skb, iphdroff, hdroff,
332 				      tuple, maniptype);
333 	case IPPROTO_GRE:
334 		return gre_manip_pkt(skb, iphdroff, hdroff,
335 				     tuple, maniptype);
336 	}
337 
338 	/* If we don't know protocol -- no error, pass it unmodified. */
339 	return true;
340 }
341 
342 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
343 				  unsigned int iphdroff,
344 				  const struct nf_conntrack_tuple *target,
345 				  enum nf_nat_manip_type maniptype)
346 {
347 	struct iphdr *iph;
348 	unsigned int hdroff;
349 
350 	if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
351 		return false;
352 
353 	iph = (void *)skb->data + iphdroff;
354 	hdroff = iphdroff + iph->ihl * 4;
355 
356 	if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
357 		return false;
358 	iph = (void *)skb->data + iphdroff;
359 
360 	if (maniptype == NF_NAT_MANIP_SRC) {
361 		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
362 		iph->saddr = target->src.u3.ip;
363 	} else {
364 		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
365 		iph->daddr = target->dst.u3.ip;
366 	}
367 	return true;
368 }
369 
370 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
371 				  unsigned int iphdroff,
372 				  const struct nf_conntrack_tuple *target,
373 				  enum nf_nat_manip_type maniptype)
374 {
375 #if IS_ENABLED(CONFIG_IPV6)
376 	struct ipv6hdr *ipv6h;
377 	__be16 frag_off;
378 	int hdroff;
379 	u8 nexthdr;
380 
381 	if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
382 		return false;
383 
384 	ipv6h = (void *)skb->data + iphdroff;
385 	nexthdr = ipv6h->nexthdr;
386 	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
387 				  &nexthdr, &frag_off);
388 	if (hdroff < 0)
389 		goto manip_addr;
390 
391 	if ((frag_off & htons(~0x7)) == 0 &&
392 	    !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
393 		return false;
394 
395 	/* must reload, offset might have changed */
396 	ipv6h = (void *)skb->data + iphdroff;
397 
398 manip_addr:
399 	if (maniptype == NF_NAT_MANIP_SRC)
400 		ipv6h->saddr = target->src.u3.in6;
401 	else
402 		ipv6h->daddr = target->dst.u3.in6;
403 
404 #endif
405 	return true;
406 }
407 
408 unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
409 			      enum nf_nat_manip_type mtype,
410 			      enum ip_conntrack_dir dir)
411 {
412 	struct nf_conntrack_tuple target;
413 
414 	/* We are aiming to look like inverse of other direction. */
415 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
416 
417 	switch (target.src.l3num) {
418 	case NFPROTO_IPV6:
419 		if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
420 			return NF_ACCEPT;
421 		break;
422 	case NFPROTO_IPV4:
423 		if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
424 			return NF_ACCEPT;
425 		break;
426 	default:
427 		WARN_ON_ONCE(1);
428 		break;
429 	}
430 
431 	return NF_DROP;
432 }
433 
434 static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
435 				    unsigned int iphdroff, __sum16 *check,
436 				    const struct nf_conntrack_tuple *t,
437 				    enum nf_nat_manip_type maniptype)
438 {
439 	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
440 	__be32 oldip, newip;
441 
442 	if (maniptype == NF_NAT_MANIP_SRC) {
443 		oldip = iph->saddr;
444 		newip = t->src.u3.ip;
445 	} else {
446 		oldip = iph->daddr;
447 		newip = t->dst.u3.ip;
448 	}
449 	inet_proto_csum_replace4(check, skb, oldip, newip, true);
450 }
451 
452 static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
453 				    unsigned int iphdroff, __sum16 *check,
454 				    const struct nf_conntrack_tuple *t,
455 				    enum nf_nat_manip_type maniptype)
456 {
457 #if IS_ENABLED(CONFIG_IPV6)
458 	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
459 	const struct in6_addr *oldip, *newip;
460 
461 	if (maniptype == NF_NAT_MANIP_SRC) {
462 		oldip = &ipv6h->saddr;
463 		newip = &t->src.u3.in6;
464 	} else {
465 		oldip = &ipv6h->daddr;
466 		newip = &t->dst.u3.in6;
467 	}
468 	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
469 				  newip->s6_addr32, true);
470 #endif
471 }
472 
473 static void nf_csum_update(struct sk_buff *skb,
474 			   unsigned int iphdroff, __sum16 *check,
475 			   const struct nf_conntrack_tuple *t,
476 			   enum nf_nat_manip_type maniptype)
477 {
478 	switch (t->src.l3num) {
479 	case NFPROTO_IPV4:
480 		nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
481 		return;
482 	case NFPROTO_IPV6:
483 		nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
484 		return;
485 	}
486 }
487 
488 static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
489 				    u8 proto, void *data, __sum16 *check,
490 				    int datalen, int oldlen)
491 {
492 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
493 		const struct iphdr *iph = ip_hdr(skb);
494 
495 		skb->ip_summed = CHECKSUM_PARTIAL;
496 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
497 			ip_hdrlen(skb);
498 		skb->csum_offset = (void *)check - data;
499 		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
500 					    proto, 0);
501 	} else {
502 		inet_proto_csum_replace2(check, skb,
503 					 htons(oldlen), htons(datalen), true);
504 	}
505 }
506 
507 #if IS_ENABLED(CONFIG_IPV6)
508 static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
509 				    u8 proto, void *data, __sum16 *check,
510 				    int datalen, int oldlen)
511 {
512 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
513 		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
514 
515 		skb->ip_summed = CHECKSUM_PARTIAL;
516 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
517 			(data - (void *)skb->data);
518 		skb->csum_offset = (void *)check - data;
519 		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
520 					  datalen, proto, 0);
521 	} else {
522 		inet_proto_csum_replace2(check, skb,
523 					 htons(oldlen), htons(datalen), true);
524 	}
525 }
526 #endif
527 
528 void nf_nat_csum_recalc(struct sk_buff *skb,
529 			u8 nfproto, u8 proto, void *data, __sum16 *check,
530 			int datalen, int oldlen)
531 {
532 	switch (nfproto) {
533 	case NFPROTO_IPV4:
534 		nf_nat_ipv4_csum_recalc(skb, proto, data, check,
535 					datalen, oldlen);
536 		return;
537 #if IS_ENABLED(CONFIG_IPV6)
538 	case NFPROTO_IPV6:
539 		nf_nat_ipv6_csum_recalc(skb, proto, data, check,
540 					datalen, oldlen);
541 		return;
542 #endif
543 	}
544 
545 	WARN_ON_ONCE(1);
546 }
547 
548 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
549 				  struct nf_conn *ct,
550 				  enum ip_conntrack_info ctinfo,
551 				  unsigned int hooknum)
552 {
553 	struct {
554 		struct icmphdr	icmp;
555 		struct iphdr	ip;
556 	} *inside;
557 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
558 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
559 	unsigned int hdrlen = ip_hdrlen(skb);
560 	struct nf_conntrack_tuple target;
561 	unsigned long statusbit;
562 
563 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
564 
565 	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
566 		return 0;
567 	if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
568 		return 0;
569 
570 	inside = (void *)skb->data + hdrlen;
571 	if (inside->icmp.type == ICMP_REDIRECT) {
572 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
573 			return 0;
574 		if (ct->status & IPS_NAT_MASK)
575 			return 0;
576 	}
577 
578 	if (manip == NF_NAT_MANIP_SRC)
579 		statusbit = IPS_SRC_NAT;
580 	else
581 		statusbit = IPS_DST_NAT;
582 
583 	/* Invert if this is reply direction */
584 	if (dir == IP_CT_DIR_REPLY)
585 		statusbit ^= IPS_NAT_MASK;
586 
587 	if (!(ct->status & statusbit))
588 		return 1;
589 
590 	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
591 				   &ct->tuplehash[!dir].tuple, !manip))
592 		return 0;
593 
594 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
595 		/* Reloading "inside" here since manip_pkt may reallocate */
596 		inside = (void *)skb->data + hdrlen;
597 		inside->icmp.checksum = 0;
598 		inside->icmp.checksum =
599 			csum_fold(skb_checksum(skb, hdrlen,
600 					       skb->len - hdrlen, 0));
601 	}
602 
603 	/* Change outer to look like the reply to an incoming packet */
604 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
605 	target.dst.protonum = IPPROTO_ICMP;
606 	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
607 		return 0;
608 
609 	return 1;
610 }
611 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
612 
613 static unsigned int
614 nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
615 	       const struct nf_hook_state *state)
616 {
617 	struct nf_conn *ct;
618 	enum ip_conntrack_info ctinfo;
619 
620 	ct = nf_ct_get(skb, &ctinfo);
621 	if (!ct)
622 		return NF_ACCEPT;
623 
624 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
625 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
626 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
627 							   state->hook))
628 				return NF_DROP;
629 			else
630 				return NF_ACCEPT;
631 		}
632 	}
633 
634 	return nf_nat_inet_fn(priv, skb, state);
635 }
636 
637 static unsigned int
638 nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
639 	       const struct nf_hook_state *state)
640 {
641 	unsigned int ret;
642 	__be32 daddr = ip_hdr(skb)->daddr;
643 
644 	ret = nf_nat_ipv4_fn(priv, skb, state);
645 	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
646 		skb_dst_drop(skb);
647 
648 	return ret;
649 }
650 
651 static unsigned int
652 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
653 		const struct nf_hook_state *state)
654 {
655 #ifdef CONFIG_XFRM
656 	const struct nf_conn *ct;
657 	enum ip_conntrack_info ctinfo;
658 	int err;
659 #endif
660 	unsigned int ret;
661 
662 	ret = nf_nat_ipv4_fn(priv, skb, state);
663 #ifdef CONFIG_XFRM
664 	if (ret != NF_ACCEPT)
665 		return ret;
666 
667 	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
668 		return ret;
669 
670 	ct = nf_ct_get(skb, &ctinfo);
671 	if (ct) {
672 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
673 
674 		if (ct->tuplehash[dir].tuple.src.u3.ip !=
675 		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
676 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
677 		     ct->tuplehash[dir].tuple.src.u.all !=
678 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
679 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
680 			if (err < 0)
681 				ret = NF_DROP_ERR(err);
682 		}
683 	}
684 #endif
685 	return ret;
686 }
687 
688 static unsigned int
689 nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
690 		     const struct nf_hook_state *state)
691 {
692 	const struct nf_conn *ct;
693 	enum ip_conntrack_info ctinfo;
694 	unsigned int ret;
695 	int err;
696 
697 	ret = nf_nat_ipv4_fn(priv, skb, state);
698 	if (ret != NF_ACCEPT)
699 		return ret;
700 
701 	ct = nf_ct_get(skb, &ctinfo);
702 	if (ct) {
703 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
704 
705 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
706 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
707 			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
708 			if (err < 0)
709 				ret = NF_DROP_ERR(err);
710 		}
711 #ifdef CONFIG_XFRM
712 		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
713 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
714 			 ct->tuplehash[dir].tuple.dst.u.all !=
715 			 ct->tuplehash[!dir].tuple.src.u.all) {
716 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
717 			if (err < 0)
718 				ret = NF_DROP_ERR(err);
719 		}
720 #endif
721 	}
722 	return ret;
723 }
724 
725 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
726 	/* Before packet filtering, change destination */
727 	{
728 		.hook		= nf_nat_ipv4_in,
729 		.pf		= NFPROTO_IPV4,
730 		.hooknum	= NF_INET_PRE_ROUTING,
731 		.priority	= NF_IP_PRI_NAT_DST,
732 	},
733 	/* After packet filtering, change source */
734 	{
735 		.hook		= nf_nat_ipv4_out,
736 		.pf		= NFPROTO_IPV4,
737 		.hooknum	= NF_INET_POST_ROUTING,
738 		.priority	= NF_IP_PRI_NAT_SRC,
739 	},
740 	/* Before packet filtering, change destination */
741 	{
742 		.hook		= nf_nat_ipv4_local_fn,
743 		.pf		= NFPROTO_IPV4,
744 		.hooknum	= NF_INET_LOCAL_OUT,
745 		.priority	= NF_IP_PRI_NAT_DST,
746 	},
747 	/* After packet filtering, change source */
748 	{
749 		.hook		= nf_nat_ipv4_fn,
750 		.pf		= NFPROTO_IPV4,
751 		.hooknum	= NF_INET_LOCAL_IN,
752 		.priority	= NF_IP_PRI_NAT_SRC,
753 	},
754 };
755 
756 int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
757 {
758 	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
759 				  ARRAY_SIZE(nf_nat_ipv4_ops));
760 }
761 EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
762 
763 void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
764 {
765 	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
766 }
767 EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
768 
769 #if IS_ENABLED(CONFIG_IPV6)
770 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
771 				    struct nf_conn *ct,
772 				    enum ip_conntrack_info ctinfo,
773 				    unsigned int hooknum,
774 				    unsigned int hdrlen)
775 {
776 	struct {
777 		struct icmp6hdr	icmp6;
778 		struct ipv6hdr	ip6;
779 	} *inside;
780 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
781 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
782 	struct nf_conntrack_tuple target;
783 	unsigned long statusbit;
784 
785 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
786 
787 	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
788 		return 0;
789 	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
790 		return 0;
791 
792 	inside = (void *)skb->data + hdrlen;
793 	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
794 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
795 			return 0;
796 		if (ct->status & IPS_NAT_MASK)
797 			return 0;
798 	}
799 
800 	if (manip == NF_NAT_MANIP_SRC)
801 		statusbit = IPS_SRC_NAT;
802 	else
803 		statusbit = IPS_DST_NAT;
804 
805 	/* Invert if this is reply direction */
806 	if (dir == IP_CT_DIR_REPLY)
807 		statusbit ^= IPS_NAT_MASK;
808 
809 	if (!(ct->status & statusbit))
810 		return 1;
811 
812 	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
813 				   &ct->tuplehash[!dir].tuple, !manip))
814 		return 0;
815 
816 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
817 		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
818 
819 		inside = (void *)skb->data + hdrlen;
820 		inside->icmp6.icmp6_cksum = 0;
821 		inside->icmp6.icmp6_cksum =
822 			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
823 					skb->len - hdrlen, IPPROTO_ICMPV6,
824 					skb_checksum(skb, hdrlen,
825 						     skb->len - hdrlen, 0));
826 	}
827 
828 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
829 	target.dst.protonum = IPPROTO_ICMPV6;
830 	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
831 		return 0;
832 
833 	return 1;
834 }
835 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
836 
837 static unsigned int
838 nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
839 	       const struct nf_hook_state *state)
840 {
841 	struct nf_conn *ct;
842 	enum ip_conntrack_info ctinfo;
843 	__be16 frag_off;
844 	int hdrlen;
845 	u8 nexthdr;
846 
847 	ct = nf_ct_get(skb, &ctinfo);
848 	/* Can't track?  It's not due to stress, or conntrack would
849 	 * have dropped it.  Hence it's the user's responsibilty to
850 	 * packet filter it out, or implement conntrack/NAT for that
851 	 * protocol. 8) --RR
852 	 */
853 	if (!ct)
854 		return NF_ACCEPT;
855 
856 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
857 		nexthdr = ipv6_hdr(skb)->nexthdr;
858 		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
859 					  &nexthdr, &frag_off);
860 
861 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
862 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
863 							     state->hook,
864 							     hdrlen))
865 				return NF_DROP;
866 			else
867 				return NF_ACCEPT;
868 		}
869 	}
870 
871 	return nf_nat_inet_fn(priv, skb, state);
872 }
873 
874 static unsigned int
875 nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
876 	       const struct nf_hook_state *state)
877 {
878 	unsigned int ret;
879 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
880 
881 	ret = nf_nat_ipv6_fn(priv, skb, state);
882 	if (ret != NF_DROP && ret != NF_STOLEN &&
883 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
884 		skb_dst_drop(skb);
885 
886 	return ret;
887 }
888 
889 static unsigned int
890 nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
891 		const struct nf_hook_state *state)
892 {
893 #ifdef CONFIG_XFRM
894 	const struct nf_conn *ct;
895 	enum ip_conntrack_info ctinfo;
896 	int err;
897 #endif
898 	unsigned int ret;
899 
900 	ret = nf_nat_ipv6_fn(priv, skb, state);
901 #ifdef CONFIG_XFRM
902 	if (ret != NF_ACCEPT)
903 		return ret;
904 
905 	if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
906 		return ret;
907 	ct = nf_ct_get(skb, &ctinfo);
908 	if (ct) {
909 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
910 
911 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
912 				      &ct->tuplehash[!dir].tuple.dst.u3) ||
913 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
914 		     ct->tuplehash[dir].tuple.src.u.all !=
915 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
916 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
917 			if (err < 0)
918 				ret = NF_DROP_ERR(err);
919 		}
920 	}
921 #endif
922 
923 	return ret;
924 }
925 
926 static unsigned int
927 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
928 		     const struct nf_hook_state *state)
929 {
930 	const struct nf_conn *ct;
931 	enum ip_conntrack_info ctinfo;
932 	unsigned int ret;
933 	int err;
934 
935 	ret = nf_nat_ipv6_fn(priv, skb, state);
936 	if (ret != NF_ACCEPT)
937 		return ret;
938 
939 	ct = nf_ct_get(skb, &ctinfo);
940 	if (ct) {
941 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
942 
943 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
944 				      &ct->tuplehash[!dir].tuple.src.u3)) {
945 			err = nf_ip6_route_me_harder(state->net, skb);
946 			if (err < 0)
947 				ret = NF_DROP_ERR(err);
948 		}
949 #ifdef CONFIG_XFRM
950 		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
951 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
952 			 ct->tuplehash[dir].tuple.dst.u.all !=
953 			 ct->tuplehash[!dir].tuple.src.u.all) {
954 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
955 			if (err < 0)
956 				ret = NF_DROP_ERR(err);
957 		}
958 #endif
959 	}
960 
961 	return ret;
962 }
963 
964 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
965 	/* Before packet filtering, change destination */
966 	{
967 		.hook		= nf_nat_ipv6_in,
968 		.pf		= NFPROTO_IPV6,
969 		.hooknum	= NF_INET_PRE_ROUTING,
970 		.priority	= NF_IP6_PRI_NAT_DST,
971 	},
972 	/* After packet filtering, change source */
973 	{
974 		.hook		= nf_nat_ipv6_out,
975 		.pf		= NFPROTO_IPV6,
976 		.hooknum	= NF_INET_POST_ROUTING,
977 		.priority	= NF_IP6_PRI_NAT_SRC,
978 	},
979 	/* Before packet filtering, change destination */
980 	{
981 		.hook		= nf_nat_ipv6_local_fn,
982 		.pf		= NFPROTO_IPV6,
983 		.hooknum	= NF_INET_LOCAL_OUT,
984 		.priority	= NF_IP6_PRI_NAT_DST,
985 	},
986 	/* After packet filtering, change source */
987 	{
988 		.hook		= nf_nat_ipv6_fn,
989 		.pf		= NFPROTO_IPV6,
990 		.hooknum	= NF_INET_LOCAL_IN,
991 		.priority	= NF_IP6_PRI_NAT_SRC,
992 	},
993 };
994 
995 int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
996 {
997 	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
998 				  ARRAY_SIZE(nf_nat_ipv6_ops));
999 }
1000 EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
1001 
1002 void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1003 {
1004 	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1005 }
1006 EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
1007 #endif /* CONFIG_IPV6 */
1008 
1009 #if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
1010 int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
1011 {
1012 	int ret;
1013 
1014 	if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
1015 		return -EINVAL;
1016 
1017 	ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
1018 				 ARRAY_SIZE(nf_nat_ipv6_ops));
1019 	if (ret)
1020 		return ret;
1021 
1022 	ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
1023 				 ARRAY_SIZE(nf_nat_ipv4_ops));
1024 	if (ret)
1025 		nf_nat_ipv6_unregister_fn(net, ops);
1026 
1027 	return ret;
1028 }
1029 EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
1030 
1031 void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1032 {
1033 	nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
1034 	nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1035 }
1036 EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
1037 #endif /* NFT INET NAT */
1038