xref: /openbmc/linux/net/netfilter/nf_nat_proto.c (revision b4faef17)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* (C) 1999-2001 Paul `Rusty' Russell
3  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
4  */
5 
6 #include <linux/types.h>
7 #include <linux/export.h>
8 #include <linux/init.h>
9 #include <linux/udp.h>
10 #include <linux/tcp.h>
11 #include <linux/icmp.h>
12 #include <linux/icmpv6.h>
13 
14 #include <linux/dccp.h>
15 #include <linux/sctp.h>
16 #include <net/sctp/checksum.h>
17 
18 #include <linux/netfilter.h>
19 #include <net/netfilter/nf_nat.h>
20 
21 #include <linux/ipv6.h>
22 #include <linux/netfilter_ipv6.h>
23 #include <net/checksum.h>
24 #include <net/ip6_checksum.h>
25 #include <net/ip6_route.h>
26 #include <net/xfrm.h>
27 #include <net/ipv6.h>
28 
29 #include <net/netfilter/nf_conntrack_core.h>
30 #include <net/netfilter/nf_conntrack.h>
31 #include <linux/netfilter/nfnetlink_conntrack.h>
32 
33 static void nf_csum_update(struct sk_buff *skb,
34 			   unsigned int iphdroff, __sum16 *check,
35 			   const struct nf_conntrack_tuple *t,
36 			   enum nf_nat_manip_type maniptype);
37 
38 static void
39 __udp_manip_pkt(struct sk_buff *skb,
40 	        unsigned int iphdroff, struct udphdr *hdr,
41 	        const struct nf_conntrack_tuple *tuple,
42 	        enum nf_nat_manip_type maniptype, bool do_csum)
43 {
44 	__be16 *portptr, newport;
45 
46 	if (maniptype == NF_NAT_MANIP_SRC) {
47 		/* Get rid of src port */
48 		newport = tuple->src.u.udp.port;
49 		portptr = &hdr->source;
50 	} else {
51 		/* Get rid of dst port */
52 		newport = tuple->dst.u.udp.port;
53 		portptr = &hdr->dest;
54 	}
55 	if (do_csum) {
56 		nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
57 		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
58 					 false);
59 		if (!hdr->check)
60 			hdr->check = CSUM_MANGLED_0;
61 	}
62 	*portptr = newport;
63 }
64 
65 static bool udp_manip_pkt(struct sk_buff *skb,
66 			  unsigned int iphdroff, unsigned int hdroff,
67 			  const struct nf_conntrack_tuple *tuple,
68 			  enum nf_nat_manip_type maniptype)
69 {
70 	struct udphdr *hdr;
71 	bool do_csum;
72 
73 	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
74 		return false;
75 
76 	hdr = (struct udphdr *)(skb->data + hdroff);
77 	do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
78 
79 	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
80 	return true;
81 }
82 
83 static bool udplite_manip_pkt(struct sk_buff *skb,
84 			      unsigned int iphdroff, unsigned int hdroff,
85 			      const struct nf_conntrack_tuple *tuple,
86 			      enum nf_nat_manip_type maniptype)
87 {
88 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
89 	struct udphdr *hdr;
90 
91 	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
92 		return false;
93 
94 	hdr = (struct udphdr *)(skb->data + hdroff);
95 	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
96 #endif
97 	return true;
98 }
99 
100 static bool
101 sctp_manip_pkt(struct sk_buff *skb,
102 	       unsigned int iphdroff, unsigned int hdroff,
103 	       const struct nf_conntrack_tuple *tuple,
104 	       enum nf_nat_manip_type maniptype)
105 {
106 #ifdef CONFIG_NF_CT_PROTO_SCTP
107 	struct sctphdr *hdr;
108 	int hdrsize = 8;
109 
110 	/* This could be an inner header returned in imcp packet; in such
111 	 * cases we cannot update the checksum field since it is outside
112 	 * of the 8 bytes of transport layer headers we are guaranteed.
113 	 */
114 	if (skb->len >= hdroff + sizeof(*hdr))
115 		hdrsize = sizeof(*hdr);
116 
117 	if (skb_ensure_writable(skb, hdroff + hdrsize))
118 		return false;
119 
120 	hdr = (struct sctphdr *)(skb->data + hdroff);
121 
122 	if (maniptype == NF_NAT_MANIP_SRC) {
123 		/* Get rid of src port */
124 		hdr->source = tuple->src.u.sctp.port;
125 	} else {
126 		/* Get rid of dst port */
127 		hdr->dest = tuple->dst.u.sctp.port;
128 	}
129 
130 	if (hdrsize < sizeof(*hdr))
131 		return true;
132 
133 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
134 		hdr->checksum = sctp_compute_cksum(skb, hdroff);
135 		skb->ip_summed = CHECKSUM_NONE;
136 	}
137 
138 #endif
139 	return true;
140 }
141 
142 static bool
143 tcp_manip_pkt(struct sk_buff *skb,
144 	      unsigned int iphdroff, unsigned int hdroff,
145 	      const struct nf_conntrack_tuple *tuple,
146 	      enum nf_nat_manip_type maniptype)
147 {
148 	struct tcphdr *hdr;
149 	__be16 *portptr, newport, oldport;
150 	int hdrsize = 8; /* TCP connection tracking guarantees this much */
151 
152 	/* this could be a inner header returned in icmp packet; in such
153 	   cases we cannot update the checksum field since it is outside of
154 	   the 8 bytes of transport layer headers we are guaranteed */
155 	if (skb->len >= hdroff + sizeof(struct tcphdr))
156 		hdrsize = sizeof(struct tcphdr);
157 
158 	if (skb_ensure_writable(skb, hdroff + hdrsize))
159 		return false;
160 
161 	hdr = (struct tcphdr *)(skb->data + hdroff);
162 
163 	if (maniptype == NF_NAT_MANIP_SRC) {
164 		/* Get rid of src port */
165 		newport = tuple->src.u.tcp.port;
166 		portptr = &hdr->source;
167 	} else {
168 		/* Get rid of dst port */
169 		newport = tuple->dst.u.tcp.port;
170 		portptr = &hdr->dest;
171 	}
172 
173 	oldport = *portptr;
174 	*portptr = newport;
175 
176 	if (hdrsize < sizeof(*hdr))
177 		return true;
178 
179 	nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
180 	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
181 	return true;
182 }
183 
184 static bool
185 dccp_manip_pkt(struct sk_buff *skb,
186 	       unsigned int iphdroff, unsigned int hdroff,
187 	       const struct nf_conntrack_tuple *tuple,
188 	       enum nf_nat_manip_type maniptype)
189 {
190 #ifdef CONFIG_NF_CT_PROTO_DCCP
191 	struct dccp_hdr *hdr;
192 	__be16 *portptr, oldport, newport;
193 	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
194 
195 	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
196 		hdrsize = sizeof(struct dccp_hdr);
197 
198 	if (skb_ensure_writable(skb, hdroff + hdrsize))
199 		return false;
200 
201 	hdr = (struct dccp_hdr *)(skb->data + hdroff);
202 
203 	if (maniptype == NF_NAT_MANIP_SRC) {
204 		newport = tuple->src.u.dccp.port;
205 		portptr = &hdr->dccph_sport;
206 	} else {
207 		newport = tuple->dst.u.dccp.port;
208 		portptr = &hdr->dccph_dport;
209 	}
210 
211 	oldport = *portptr;
212 	*portptr = newport;
213 
214 	if (hdrsize < sizeof(*hdr))
215 		return true;
216 
217 	nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
218 	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
219 				 false);
220 #endif
221 	return true;
222 }
223 
224 static bool
225 icmp_manip_pkt(struct sk_buff *skb,
226 	       unsigned int iphdroff, unsigned int hdroff,
227 	       const struct nf_conntrack_tuple *tuple,
228 	       enum nf_nat_manip_type maniptype)
229 {
230 	struct icmphdr *hdr;
231 
232 	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
233 		return false;
234 
235 	hdr = (struct icmphdr *)(skb->data + hdroff);
236 	switch (hdr->type) {
237 	case ICMP_ECHO:
238 	case ICMP_ECHOREPLY:
239 	case ICMP_TIMESTAMP:
240 	case ICMP_TIMESTAMPREPLY:
241 	case ICMP_INFO_REQUEST:
242 	case ICMP_INFO_REPLY:
243 	case ICMP_ADDRESS:
244 	case ICMP_ADDRESSREPLY:
245 		break;
246 	default:
247 		return true;
248 	}
249 	inet_proto_csum_replace2(&hdr->checksum, skb,
250 				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
251 	hdr->un.echo.id = tuple->src.u.icmp.id;
252 	return true;
253 }
254 
255 static bool
256 icmpv6_manip_pkt(struct sk_buff *skb,
257 		 unsigned int iphdroff, unsigned int hdroff,
258 		 const struct nf_conntrack_tuple *tuple,
259 		 enum nf_nat_manip_type maniptype)
260 {
261 	struct icmp6hdr *hdr;
262 
263 	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
264 		return false;
265 
266 	hdr = (struct icmp6hdr *)(skb->data + hdroff);
267 	nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
268 	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
269 	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
270 		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
271 					 hdr->icmp6_identifier,
272 					 tuple->src.u.icmp.id, false);
273 		hdr->icmp6_identifier = tuple->src.u.icmp.id;
274 	}
275 	return true;
276 }
277 
278 /* manipulate a GRE packet according to maniptype */
279 static bool
280 gre_manip_pkt(struct sk_buff *skb,
281 	      unsigned int iphdroff, unsigned int hdroff,
282 	      const struct nf_conntrack_tuple *tuple,
283 	      enum nf_nat_manip_type maniptype)
284 {
285 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
286 	const struct gre_base_hdr *greh;
287 	struct pptp_gre_header *pgreh;
288 
289 	/* pgreh includes two optional 32bit fields which are not required
290 	 * to be there.  That's where the magic '8' comes from */
291 	if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
292 		return false;
293 
294 	greh = (void *)skb->data + hdroff;
295 	pgreh = (struct pptp_gre_header *)greh;
296 
297 	/* we only have destination manip of a packet, since 'source key'
298 	 * is not present in the packet itself */
299 	if (maniptype != NF_NAT_MANIP_DST)
300 		return true;
301 
302 	switch (greh->flags & GRE_VERSION) {
303 	case GRE_VERSION_0:
304 		/* We do not currently NAT any GREv0 packets.
305 		 * Try to behave like "nf_nat_proto_unknown" */
306 		break;
307 	case GRE_VERSION_1:
308 		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
309 		pgreh->call_id = tuple->dst.u.gre.key;
310 		break;
311 	default:
312 		pr_debug("can't nat unknown GRE version\n");
313 		return false;
314 	}
315 #endif
316 	return true;
317 }
318 
319 static bool l4proto_manip_pkt(struct sk_buff *skb,
320 			      unsigned int iphdroff, unsigned int hdroff,
321 			      const struct nf_conntrack_tuple *tuple,
322 			      enum nf_nat_manip_type maniptype)
323 {
324 	switch (tuple->dst.protonum) {
325 	case IPPROTO_TCP:
326 		return tcp_manip_pkt(skb, iphdroff, hdroff,
327 				     tuple, maniptype);
328 	case IPPROTO_UDP:
329 		return udp_manip_pkt(skb, iphdroff, hdroff,
330 				     tuple, maniptype);
331 	case IPPROTO_UDPLITE:
332 		return udplite_manip_pkt(skb, iphdroff, hdroff,
333 					 tuple, maniptype);
334 	case IPPROTO_SCTP:
335 		return sctp_manip_pkt(skb, iphdroff, hdroff,
336 				      tuple, maniptype);
337 	case IPPROTO_ICMP:
338 		return icmp_manip_pkt(skb, iphdroff, hdroff,
339 				      tuple, maniptype);
340 	case IPPROTO_ICMPV6:
341 		return icmpv6_manip_pkt(skb, iphdroff, hdroff,
342 					tuple, maniptype);
343 	case IPPROTO_DCCP:
344 		return dccp_manip_pkt(skb, iphdroff, hdroff,
345 				      tuple, maniptype);
346 	case IPPROTO_GRE:
347 		return gre_manip_pkt(skb, iphdroff, hdroff,
348 				     tuple, maniptype);
349 	}
350 
351 	/* If we don't know protocol -- no error, pass it unmodified. */
352 	return true;
353 }
354 
355 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
356 				  unsigned int iphdroff,
357 				  const struct nf_conntrack_tuple *target,
358 				  enum nf_nat_manip_type maniptype)
359 {
360 	struct iphdr *iph;
361 	unsigned int hdroff;
362 
363 	if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
364 		return false;
365 
366 	iph = (void *)skb->data + iphdroff;
367 	hdroff = iphdroff + iph->ihl * 4;
368 
369 	if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
370 		return false;
371 	iph = (void *)skb->data + iphdroff;
372 
373 	if (maniptype == NF_NAT_MANIP_SRC) {
374 		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
375 		iph->saddr = target->src.u3.ip;
376 	} else {
377 		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
378 		iph->daddr = target->dst.u3.ip;
379 	}
380 	return true;
381 }
382 
383 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
384 				  unsigned int iphdroff,
385 				  const struct nf_conntrack_tuple *target,
386 				  enum nf_nat_manip_type maniptype)
387 {
388 #if IS_ENABLED(CONFIG_IPV6)
389 	struct ipv6hdr *ipv6h;
390 	__be16 frag_off;
391 	int hdroff;
392 	u8 nexthdr;
393 
394 	if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
395 		return false;
396 
397 	ipv6h = (void *)skb->data + iphdroff;
398 	nexthdr = ipv6h->nexthdr;
399 	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
400 				  &nexthdr, &frag_off);
401 	if (hdroff < 0)
402 		goto manip_addr;
403 
404 	if ((frag_off & htons(~0x7)) == 0 &&
405 	    !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
406 		return false;
407 
408 	/* must reload, offset might have changed */
409 	ipv6h = (void *)skb->data + iphdroff;
410 
411 manip_addr:
412 	if (maniptype == NF_NAT_MANIP_SRC)
413 		ipv6h->saddr = target->src.u3.in6;
414 	else
415 		ipv6h->daddr = target->dst.u3.in6;
416 
417 #endif
418 	return true;
419 }
420 
421 unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
422 			      enum nf_nat_manip_type mtype,
423 			      enum ip_conntrack_dir dir)
424 {
425 	struct nf_conntrack_tuple target;
426 
427 	/* We are aiming to look like inverse of other direction. */
428 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
429 
430 	switch (target.src.l3num) {
431 	case NFPROTO_IPV6:
432 		if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
433 			return NF_ACCEPT;
434 		break;
435 	case NFPROTO_IPV4:
436 		if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
437 			return NF_ACCEPT;
438 		break;
439 	default:
440 		WARN_ON_ONCE(1);
441 		break;
442 	}
443 
444 	return NF_DROP;
445 }
446 
447 static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
448 				    unsigned int iphdroff, __sum16 *check,
449 				    const struct nf_conntrack_tuple *t,
450 				    enum nf_nat_manip_type maniptype)
451 {
452 	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
453 	__be32 oldip, newip;
454 
455 	if (maniptype == NF_NAT_MANIP_SRC) {
456 		oldip = iph->saddr;
457 		newip = t->src.u3.ip;
458 	} else {
459 		oldip = iph->daddr;
460 		newip = t->dst.u3.ip;
461 	}
462 	inet_proto_csum_replace4(check, skb, oldip, newip, true);
463 }
464 
465 static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
466 				    unsigned int iphdroff, __sum16 *check,
467 				    const struct nf_conntrack_tuple *t,
468 				    enum nf_nat_manip_type maniptype)
469 {
470 #if IS_ENABLED(CONFIG_IPV6)
471 	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
472 	const struct in6_addr *oldip, *newip;
473 
474 	if (maniptype == NF_NAT_MANIP_SRC) {
475 		oldip = &ipv6h->saddr;
476 		newip = &t->src.u3.in6;
477 	} else {
478 		oldip = &ipv6h->daddr;
479 		newip = &t->dst.u3.in6;
480 	}
481 	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
482 				  newip->s6_addr32, true);
483 #endif
484 }
485 
486 static void nf_csum_update(struct sk_buff *skb,
487 			   unsigned int iphdroff, __sum16 *check,
488 			   const struct nf_conntrack_tuple *t,
489 			   enum nf_nat_manip_type maniptype)
490 {
491 	switch (t->src.l3num) {
492 	case NFPROTO_IPV4:
493 		nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
494 		return;
495 	case NFPROTO_IPV6:
496 		nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
497 		return;
498 	}
499 }
500 
501 static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
502 				    u8 proto, void *data, __sum16 *check,
503 				    int datalen, int oldlen)
504 {
505 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
506 		const struct iphdr *iph = ip_hdr(skb);
507 
508 		skb->ip_summed = CHECKSUM_PARTIAL;
509 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
510 			ip_hdrlen(skb);
511 		skb->csum_offset = (void *)check - data;
512 		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
513 					    proto, 0);
514 	} else {
515 		inet_proto_csum_replace2(check, skb,
516 					 htons(oldlen), htons(datalen), true);
517 	}
518 }
519 
520 #if IS_ENABLED(CONFIG_IPV6)
521 static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
522 				    u8 proto, void *data, __sum16 *check,
523 				    int datalen, int oldlen)
524 {
525 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
526 		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
527 
528 		skb->ip_summed = CHECKSUM_PARTIAL;
529 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
530 			(data - (void *)skb->data);
531 		skb->csum_offset = (void *)check - data;
532 		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
533 					  datalen, proto, 0);
534 	} else {
535 		inet_proto_csum_replace2(check, skb,
536 					 htons(oldlen), htons(datalen), true);
537 	}
538 }
539 #endif
540 
541 void nf_nat_csum_recalc(struct sk_buff *skb,
542 			u8 nfproto, u8 proto, void *data, __sum16 *check,
543 			int datalen, int oldlen)
544 {
545 	switch (nfproto) {
546 	case NFPROTO_IPV4:
547 		nf_nat_ipv4_csum_recalc(skb, proto, data, check,
548 					datalen, oldlen);
549 		return;
550 #if IS_ENABLED(CONFIG_IPV6)
551 	case NFPROTO_IPV6:
552 		nf_nat_ipv6_csum_recalc(skb, proto, data, check,
553 					datalen, oldlen);
554 		return;
555 #endif
556 	}
557 
558 	WARN_ON_ONCE(1);
559 }
560 
561 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
562 				  struct nf_conn *ct,
563 				  enum ip_conntrack_info ctinfo,
564 				  unsigned int hooknum)
565 {
566 	struct {
567 		struct icmphdr	icmp;
568 		struct iphdr	ip;
569 	} *inside;
570 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
571 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
572 	unsigned int hdrlen = ip_hdrlen(skb);
573 	struct nf_conntrack_tuple target;
574 	unsigned long statusbit;
575 
576 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
577 
578 	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
579 		return 0;
580 	if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
581 		return 0;
582 
583 	inside = (void *)skb->data + hdrlen;
584 	if (inside->icmp.type == ICMP_REDIRECT) {
585 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
586 			return 0;
587 		if (ct->status & IPS_NAT_MASK)
588 			return 0;
589 	}
590 
591 	if (manip == NF_NAT_MANIP_SRC)
592 		statusbit = IPS_SRC_NAT;
593 	else
594 		statusbit = IPS_DST_NAT;
595 
596 	/* Invert if this is reply direction */
597 	if (dir == IP_CT_DIR_REPLY)
598 		statusbit ^= IPS_NAT_MASK;
599 
600 	if (!(ct->status & statusbit))
601 		return 1;
602 
603 	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
604 				   &ct->tuplehash[!dir].tuple, !manip))
605 		return 0;
606 
607 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
608 		/* Reloading "inside" here since manip_pkt may reallocate */
609 		inside = (void *)skb->data + hdrlen;
610 		inside->icmp.checksum = 0;
611 		inside->icmp.checksum =
612 			csum_fold(skb_checksum(skb, hdrlen,
613 					       skb->len - hdrlen, 0));
614 	}
615 
616 	/* Change outer to look like the reply to an incoming packet */
617 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
618 	target.dst.protonum = IPPROTO_ICMP;
619 	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
620 		return 0;
621 
622 	return 1;
623 }
624 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
625 
626 static unsigned int
627 nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
628 	       const struct nf_hook_state *state)
629 {
630 	struct nf_conn *ct;
631 	enum ip_conntrack_info ctinfo;
632 
633 	ct = nf_ct_get(skb, &ctinfo);
634 	if (!ct)
635 		return NF_ACCEPT;
636 
637 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
638 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
639 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
640 							   state->hook))
641 				return NF_DROP;
642 			else
643 				return NF_ACCEPT;
644 		}
645 	}
646 
647 	return nf_nat_inet_fn(priv, skb, state);
648 }
649 
650 static unsigned int
651 nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
652 	       const struct nf_hook_state *state)
653 {
654 	unsigned int ret;
655 	__be32 daddr = ip_hdr(skb)->daddr;
656 
657 	ret = nf_nat_ipv4_fn(priv, skb, state);
658 	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
659 		skb_dst_drop(skb);
660 
661 	return ret;
662 }
663 
664 static unsigned int
665 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
666 		const struct nf_hook_state *state)
667 {
668 #ifdef CONFIG_XFRM
669 	const struct nf_conn *ct;
670 	enum ip_conntrack_info ctinfo;
671 	int err;
672 #endif
673 	unsigned int ret;
674 
675 	ret = nf_nat_ipv4_fn(priv, skb, state);
676 #ifdef CONFIG_XFRM
677 	if (ret != NF_ACCEPT)
678 		return ret;
679 
680 	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
681 		return ret;
682 
683 	ct = nf_ct_get(skb, &ctinfo);
684 	if (ct) {
685 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
686 
687 		if (ct->tuplehash[dir].tuple.src.u3.ip !=
688 		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
689 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
690 		     ct->tuplehash[dir].tuple.src.u.all !=
691 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
692 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
693 			if (err < 0)
694 				ret = NF_DROP_ERR(err);
695 		}
696 	}
697 #endif
698 	return ret;
699 }
700 
701 static unsigned int
702 nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
703 		     const struct nf_hook_state *state)
704 {
705 	const struct nf_conn *ct;
706 	enum ip_conntrack_info ctinfo;
707 	unsigned int ret;
708 	int err;
709 
710 	ret = nf_nat_ipv4_fn(priv, skb, state);
711 	if (ret != NF_ACCEPT)
712 		return ret;
713 
714 	ct = nf_ct_get(skb, &ctinfo);
715 	if (ct) {
716 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
717 
718 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
719 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
720 			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
721 			if (err < 0)
722 				ret = NF_DROP_ERR(err);
723 		}
724 #ifdef CONFIG_XFRM
725 		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
726 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
727 			 ct->tuplehash[dir].tuple.dst.u.all !=
728 			 ct->tuplehash[!dir].tuple.src.u.all) {
729 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
730 			if (err < 0)
731 				ret = NF_DROP_ERR(err);
732 		}
733 #endif
734 	}
735 	return ret;
736 }
737 
738 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
739 	/* Before packet filtering, change destination */
740 	{
741 		.hook		= nf_nat_ipv4_in,
742 		.pf		= NFPROTO_IPV4,
743 		.hooknum	= NF_INET_PRE_ROUTING,
744 		.priority	= NF_IP_PRI_NAT_DST,
745 	},
746 	/* After packet filtering, change source */
747 	{
748 		.hook		= nf_nat_ipv4_out,
749 		.pf		= NFPROTO_IPV4,
750 		.hooknum	= NF_INET_POST_ROUTING,
751 		.priority	= NF_IP_PRI_NAT_SRC,
752 	},
753 	/* Before packet filtering, change destination */
754 	{
755 		.hook		= nf_nat_ipv4_local_fn,
756 		.pf		= NFPROTO_IPV4,
757 		.hooknum	= NF_INET_LOCAL_OUT,
758 		.priority	= NF_IP_PRI_NAT_DST,
759 	},
760 	/* After packet filtering, change source */
761 	{
762 		.hook		= nf_nat_ipv4_fn,
763 		.pf		= NFPROTO_IPV4,
764 		.hooknum	= NF_INET_LOCAL_IN,
765 		.priority	= NF_IP_PRI_NAT_SRC,
766 	},
767 };
768 
769 int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
770 {
771 	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
772 				  ARRAY_SIZE(nf_nat_ipv4_ops));
773 }
774 EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
775 
776 void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
777 {
778 	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
779 }
780 EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
781 
782 #if IS_ENABLED(CONFIG_IPV6)
783 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
784 				    struct nf_conn *ct,
785 				    enum ip_conntrack_info ctinfo,
786 				    unsigned int hooknum,
787 				    unsigned int hdrlen)
788 {
789 	struct {
790 		struct icmp6hdr	icmp6;
791 		struct ipv6hdr	ip6;
792 	} *inside;
793 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
794 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
795 	struct nf_conntrack_tuple target;
796 	unsigned long statusbit;
797 
798 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
799 
800 	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
801 		return 0;
802 	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
803 		return 0;
804 
805 	inside = (void *)skb->data + hdrlen;
806 	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
807 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
808 			return 0;
809 		if (ct->status & IPS_NAT_MASK)
810 			return 0;
811 	}
812 
813 	if (manip == NF_NAT_MANIP_SRC)
814 		statusbit = IPS_SRC_NAT;
815 	else
816 		statusbit = IPS_DST_NAT;
817 
818 	/* Invert if this is reply direction */
819 	if (dir == IP_CT_DIR_REPLY)
820 		statusbit ^= IPS_NAT_MASK;
821 
822 	if (!(ct->status & statusbit))
823 		return 1;
824 
825 	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
826 				   &ct->tuplehash[!dir].tuple, !manip))
827 		return 0;
828 
829 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
830 		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
831 
832 		inside = (void *)skb->data + hdrlen;
833 		inside->icmp6.icmp6_cksum = 0;
834 		inside->icmp6.icmp6_cksum =
835 			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
836 					skb->len - hdrlen, IPPROTO_ICMPV6,
837 					skb_checksum(skb, hdrlen,
838 						     skb->len - hdrlen, 0));
839 	}
840 
841 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
842 	target.dst.protonum = IPPROTO_ICMPV6;
843 	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
844 		return 0;
845 
846 	return 1;
847 }
848 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
849 
850 static unsigned int
851 nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
852 	       const struct nf_hook_state *state)
853 {
854 	struct nf_conn *ct;
855 	enum ip_conntrack_info ctinfo;
856 	__be16 frag_off;
857 	int hdrlen;
858 	u8 nexthdr;
859 
860 	ct = nf_ct_get(skb, &ctinfo);
861 	/* Can't track?  It's not due to stress, or conntrack would
862 	 * have dropped it.  Hence it's the user's responsibilty to
863 	 * packet filter it out, or implement conntrack/NAT for that
864 	 * protocol. 8) --RR
865 	 */
866 	if (!ct)
867 		return NF_ACCEPT;
868 
869 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
870 		nexthdr = ipv6_hdr(skb)->nexthdr;
871 		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
872 					  &nexthdr, &frag_off);
873 
874 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
875 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
876 							     state->hook,
877 							     hdrlen))
878 				return NF_DROP;
879 			else
880 				return NF_ACCEPT;
881 		}
882 	}
883 
884 	return nf_nat_inet_fn(priv, skb, state);
885 }
886 
887 static unsigned int
888 nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
889 	       const struct nf_hook_state *state)
890 {
891 	unsigned int ret;
892 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
893 
894 	ret = nf_nat_ipv6_fn(priv, skb, state);
895 	if (ret != NF_DROP && ret != NF_STOLEN &&
896 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
897 		skb_dst_drop(skb);
898 
899 	return ret;
900 }
901 
902 static unsigned int
903 nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
904 		const struct nf_hook_state *state)
905 {
906 #ifdef CONFIG_XFRM
907 	const struct nf_conn *ct;
908 	enum ip_conntrack_info ctinfo;
909 	int err;
910 #endif
911 	unsigned int ret;
912 
913 	ret = nf_nat_ipv6_fn(priv, skb, state);
914 #ifdef CONFIG_XFRM
915 	if (ret != NF_ACCEPT)
916 		return ret;
917 
918 	if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
919 		return ret;
920 	ct = nf_ct_get(skb, &ctinfo);
921 	if (ct) {
922 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
923 
924 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
925 				      &ct->tuplehash[!dir].tuple.dst.u3) ||
926 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
927 		     ct->tuplehash[dir].tuple.src.u.all !=
928 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
929 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
930 			if (err < 0)
931 				ret = NF_DROP_ERR(err);
932 		}
933 	}
934 #endif
935 
936 	return ret;
937 }
938 
939 static unsigned int
940 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
941 		     const struct nf_hook_state *state)
942 {
943 	const struct nf_conn *ct;
944 	enum ip_conntrack_info ctinfo;
945 	unsigned int ret;
946 	int err;
947 
948 	ret = nf_nat_ipv6_fn(priv, skb, state);
949 	if (ret != NF_ACCEPT)
950 		return ret;
951 
952 	ct = nf_ct_get(skb, &ctinfo);
953 	if (ct) {
954 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
955 
956 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
957 				      &ct->tuplehash[!dir].tuple.src.u3)) {
958 			err = nf_ip6_route_me_harder(state->net, skb);
959 			if (err < 0)
960 				ret = NF_DROP_ERR(err);
961 		}
962 #ifdef CONFIG_XFRM
963 		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
964 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
965 			 ct->tuplehash[dir].tuple.dst.u.all !=
966 			 ct->tuplehash[!dir].tuple.src.u.all) {
967 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
968 			if (err < 0)
969 				ret = NF_DROP_ERR(err);
970 		}
971 #endif
972 	}
973 
974 	return ret;
975 }
976 
977 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
978 	/* Before packet filtering, change destination */
979 	{
980 		.hook		= nf_nat_ipv6_in,
981 		.pf		= NFPROTO_IPV6,
982 		.hooknum	= NF_INET_PRE_ROUTING,
983 		.priority	= NF_IP6_PRI_NAT_DST,
984 	},
985 	/* After packet filtering, change source */
986 	{
987 		.hook		= nf_nat_ipv6_out,
988 		.pf		= NFPROTO_IPV6,
989 		.hooknum	= NF_INET_POST_ROUTING,
990 		.priority	= NF_IP6_PRI_NAT_SRC,
991 	},
992 	/* Before packet filtering, change destination */
993 	{
994 		.hook		= nf_nat_ipv6_local_fn,
995 		.pf		= NFPROTO_IPV6,
996 		.hooknum	= NF_INET_LOCAL_OUT,
997 		.priority	= NF_IP6_PRI_NAT_DST,
998 	},
999 	/* After packet filtering, change source */
1000 	{
1001 		.hook		= nf_nat_ipv6_fn,
1002 		.pf		= NFPROTO_IPV6,
1003 		.hooknum	= NF_INET_LOCAL_IN,
1004 		.priority	= NF_IP6_PRI_NAT_SRC,
1005 	},
1006 };
1007 
1008 int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
1009 {
1010 	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
1011 				  ARRAY_SIZE(nf_nat_ipv6_ops));
1012 }
1013 EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
1014 
1015 void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1016 {
1017 	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1018 }
1019 EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
1020 #endif /* CONFIG_IPV6 */
1021 
1022 #if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
1023 int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
1024 {
1025 	int ret;
1026 
1027 	if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
1028 		return -EINVAL;
1029 
1030 	ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
1031 				 ARRAY_SIZE(nf_nat_ipv6_ops));
1032 	if (ret)
1033 		return ret;
1034 
1035 	ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
1036 				 ARRAY_SIZE(nf_nat_ipv4_ops));
1037 	if (ret)
1038 		nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
1039 					ARRAY_SIZE(nf_nat_ipv6_ops));
1040 	return ret;
1041 }
1042 EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
1043 
1044 void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1045 {
1046 	nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
1047 	nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1048 }
1049 EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
1050 #endif /* NFT INET NAT */
1051