xref: /openbmc/linux/net/netfilter/nf_nat_proto.c (revision dac3fe72)
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8 
9 #include <linux/types.h>
10 #include <linux/export.h>
11 #include <linux/init.h>
12 #include <linux/udp.h>
13 #include <linux/tcp.h>
14 #include <linux/icmp.h>
15 #include <linux/icmpv6.h>
16 
17 #include <linux/dccp.h>
18 #include <linux/sctp.h>
19 #include <net/sctp/checksum.h>
20 
21 #include <linux/netfilter.h>
22 #include <net/netfilter/nf_nat.h>
23 #include <net/netfilter/nf_nat_core.h>
24 #include <net/netfilter/nf_nat_l3proto.h>
25 
26 #include <linux/ipv6.h>
27 #include <linux/netfilter_ipv6.h>
28 #include <net/checksum.h>
29 #include <net/ip6_checksum.h>
30 #include <net/ip6_route.h>
31 #include <net/xfrm.h>
32 #include <net/ipv6.h>
33 
34 #include <net/netfilter/nf_conntrack_core.h>
35 #include <net/netfilter/nf_conntrack.h>
36 #include <linux/netfilter/nfnetlink_conntrack.h>
37 
38 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
39 #if IS_ENABLED(CONFIG_IPV6)
40 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
41 #endif
42 
43 static void nf_csum_update(struct sk_buff *skb,
44 			   unsigned int iphdroff, __sum16 *check,
45 			   const struct nf_conntrack_tuple *t,
46 			   enum nf_nat_manip_type maniptype);
47 
48 static void
49 __udp_manip_pkt(struct sk_buff *skb,
50 	        unsigned int iphdroff, struct udphdr *hdr,
51 	        const struct nf_conntrack_tuple *tuple,
52 	        enum nf_nat_manip_type maniptype, bool do_csum)
53 {
54 	__be16 *portptr, newport;
55 
56 	if (maniptype == NF_NAT_MANIP_SRC) {
57 		/* Get rid of src port */
58 		newport = tuple->src.u.udp.port;
59 		portptr = &hdr->source;
60 	} else {
61 		/* Get rid of dst port */
62 		newport = tuple->dst.u.udp.port;
63 		portptr = &hdr->dest;
64 	}
65 	if (do_csum) {
66 		nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
67 		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
68 					 false);
69 		if (!hdr->check)
70 			hdr->check = CSUM_MANGLED_0;
71 	}
72 	*portptr = newport;
73 }
74 
75 static bool udp_manip_pkt(struct sk_buff *skb,
76 			  unsigned int iphdroff, unsigned int hdroff,
77 			  const struct nf_conntrack_tuple *tuple,
78 			  enum nf_nat_manip_type maniptype)
79 {
80 	struct udphdr *hdr;
81 	bool do_csum;
82 
83 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
84 		return false;
85 
86 	hdr = (struct udphdr *)(skb->data + hdroff);
87 	do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
88 
89 	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
90 	return true;
91 }
92 
93 static bool udplite_manip_pkt(struct sk_buff *skb,
94 			      unsigned int iphdroff, unsigned int hdroff,
95 			      const struct nf_conntrack_tuple *tuple,
96 			      enum nf_nat_manip_type maniptype)
97 {
98 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
99 	struct udphdr *hdr;
100 
101 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
102 		return false;
103 
104 	hdr = (struct udphdr *)(skb->data + hdroff);
105 	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
106 #endif
107 	return true;
108 }
109 
110 static bool
111 sctp_manip_pkt(struct sk_buff *skb,
112 	       unsigned int iphdroff, unsigned int hdroff,
113 	       const struct nf_conntrack_tuple *tuple,
114 	       enum nf_nat_manip_type maniptype)
115 {
116 #ifdef CONFIG_NF_CT_PROTO_SCTP
117 	struct sctphdr *hdr;
118 	int hdrsize = 8;
119 
120 	/* This could be an inner header returned in imcp packet; in such
121 	 * cases we cannot update the checksum field since it is outside
122 	 * of the 8 bytes of transport layer headers we are guaranteed.
123 	 */
124 	if (skb->len >= hdroff + sizeof(*hdr))
125 		hdrsize = sizeof(*hdr);
126 
127 	if (!skb_make_writable(skb, hdroff + hdrsize))
128 		return false;
129 
130 	hdr = (struct sctphdr *)(skb->data + hdroff);
131 
132 	if (maniptype == NF_NAT_MANIP_SRC) {
133 		/* Get rid of src port */
134 		hdr->source = tuple->src.u.sctp.port;
135 	} else {
136 		/* Get rid of dst port */
137 		hdr->dest = tuple->dst.u.sctp.port;
138 	}
139 
140 	if (hdrsize < sizeof(*hdr))
141 		return true;
142 
143 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
144 		hdr->checksum = sctp_compute_cksum(skb, hdroff);
145 		skb->ip_summed = CHECKSUM_NONE;
146 	}
147 
148 #endif
149 	return true;
150 }
151 
152 static bool
153 tcp_manip_pkt(struct sk_buff *skb,
154 	      unsigned int iphdroff, unsigned int hdroff,
155 	      const struct nf_conntrack_tuple *tuple,
156 	      enum nf_nat_manip_type maniptype)
157 {
158 	struct tcphdr *hdr;
159 	__be16 *portptr, newport, oldport;
160 	int hdrsize = 8; /* TCP connection tracking guarantees this much */
161 
162 	/* this could be a inner header returned in icmp packet; in such
163 	   cases we cannot update the checksum field since it is outside of
164 	   the 8 bytes of transport layer headers we are guaranteed */
165 	if (skb->len >= hdroff + sizeof(struct tcphdr))
166 		hdrsize = sizeof(struct tcphdr);
167 
168 	if (!skb_make_writable(skb, hdroff + hdrsize))
169 		return false;
170 
171 	hdr = (struct tcphdr *)(skb->data + hdroff);
172 
173 	if (maniptype == NF_NAT_MANIP_SRC) {
174 		/* Get rid of src port */
175 		newport = tuple->src.u.tcp.port;
176 		portptr = &hdr->source;
177 	} else {
178 		/* Get rid of dst port */
179 		newport = tuple->dst.u.tcp.port;
180 		portptr = &hdr->dest;
181 	}
182 
183 	oldport = *portptr;
184 	*portptr = newport;
185 
186 	if (hdrsize < sizeof(*hdr))
187 		return true;
188 
189 	nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
190 	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
191 	return true;
192 }
193 
194 static bool
195 dccp_manip_pkt(struct sk_buff *skb,
196 	       unsigned int iphdroff, unsigned int hdroff,
197 	       const struct nf_conntrack_tuple *tuple,
198 	       enum nf_nat_manip_type maniptype)
199 {
200 #ifdef CONFIG_NF_CT_PROTO_DCCP
201 	struct dccp_hdr *hdr;
202 	__be16 *portptr, oldport, newport;
203 	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
204 
205 	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
206 		hdrsize = sizeof(struct dccp_hdr);
207 
208 	if (!skb_make_writable(skb, hdroff + hdrsize))
209 		return false;
210 
211 	hdr = (struct dccp_hdr *)(skb->data + hdroff);
212 
213 	if (maniptype == NF_NAT_MANIP_SRC) {
214 		newport = tuple->src.u.dccp.port;
215 		portptr = &hdr->dccph_sport;
216 	} else {
217 		newport = tuple->dst.u.dccp.port;
218 		portptr = &hdr->dccph_dport;
219 	}
220 
221 	oldport = *portptr;
222 	*portptr = newport;
223 
224 	if (hdrsize < sizeof(*hdr))
225 		return true;
226 
227 	nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
228 	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
229 				 false);
230 #endif
231 	return true;
232 }
233 
234 static bool
235 icmp_manip_pkt(struct sk_buff *skb,
236 	       unsigned int iphdroff, unsigned int hdroff,
237 	       const struct nf_conntrack_tuple *tuple,
238 	       enum nf_nat_manip_type maniptype)
239 {
240 	struct icmphdr *hdr;
241 
242 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
243 		return false;
244 
245 	hdr = (struct icmphdr *)(skb->data + hdroff);
246 	inet_proto_csum_replace2(&hdr->checksum, skb,
247 				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
248 	hdr->un.echo.id = tuple->src.u.icmp.id;
249 	return true;
250 }
251 
252 static bool
253 icmpv6_manip_pkt(struct sk_buff *skb,
254 		 unsigned int iphdroff, unsigned int hdroff,
255 		 const struct nf_conntrack_tuple *tuple,
256 		 enum nf_nat_manip_type maniptype)
257 {
258 	struct icmp6hdr *hdr;
259 
260 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
261 		return false;
262 
263 	hdr = (struct icmp6hdr *)(skb->data + hdroff);
264 	nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
265 	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
266 	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
267 		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
268 					 hdr->icmp6_identifier,
269 					 tuple->src.u.icmp.id, false);
270 		hdr->icmp6_identifier = tuple->src.u.icmp.id;
271 	}
272 	return true;
273 }
274 
275 /* manipulate a GRE packet according to maniptype */
276 static bool
277 gre_manip_pkt(struct sk_buff *skb,
278 	      unsigned int iphdroff, unsigned int hdroff,
279 	      const struct nf_conntrack_tuple *tuple,
280 	      enum nf_nat_manip_type maniptype)
281 {
282 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
283 	const struct gre_base_hdr *greh;
284 	struct pptp_gre_header *pgreh;
285 
286 	/* pgreh includes two optional 32bit fields which are not required
287 	 * to be there.  That's where the magic '8' comes from */
288 	if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
289 		return false;
290 
291 	greh = (void *)skb->data + hdroff;
292 	pgreh = (struct pptp_gre_header *)greh;
293 
294 	/* we only have destination manip of a packet, since 'source key'
295 	 * is not present in the packet itself */
296 	if (maniptype != NF_NAT_MANIP_DST)
297 		return true;
298 
299 	switch (greh->flags & GRE_VERSION) {
300 	case GRE_VERSION_0:
301 		/* We do not currently NAT any GREv0 packets.
302 		 * Try to behave like "nf_nat_proto_unknown" */
303 		break;
304 	case GRE_VERSION_1:
305 		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
306 		pgreh->call_id = tuple->dst.u.gre.key;
307 		break;
308 	default:
309 		pr_debug("can't nat unknown GRE version\n");
310 		return false;
311 	}
312 #endif
313 	return true;
314 }
315 
316 static bool l4proto_manip_pkt(struct sk_buff *skb,
317 			      unsigned int iphdroff, unsigned int hdroff,
318 			      const struct nf_conntrack_tuple *tuple,
319 			      enum nf_nat_manip_type maniptype)
320 {
321 	switch (tuple->dst.protonum) {
322 	case IPPROTO_TCP:
323 		return tcp_manip_pkt(skb, iphdroff, hdroff,
324 				     tuple, maniptype);
325 	case IPPROTO_UDP:
326 		return udp_manip_pkt(skb, iphdroff, hdroff,
327 				     tuple, maniptype);
328 	case IPPROTO_UDPLITE:
329 		return udplite_manip_pkt(skb, iphdroff, hdroff,
330 					 tuple, maniptype);
331 	case IPPROTO_SCTP:
332 		return sctp_manip_pkt(skb, iphdroff, hdroff,
333 				      tuple, maniptype);
334 	case IPPROTO_ICMP:
335 		return icmp_manip_pkt(skb, iphdroff, hdroff,
336 				      tuple, maniptype);
337 	case IPPROTO_ICMPV6:
338 		return icmpv6_manip_pkt(skb, iphdroff, hdroff,
339 					tuple, maniptype);
340 	case IPPROTO_DCCP:
341 		return dccp_manip_pkt(skb, iphdroff, hdroff,
342 				      tuple, maniptype);
343 	case IPPROTO_GRE:
344 		return gre_manip_pkt(skb, iphdroff, hdroff,
345 				     tuple, maniptype);
346 	}
347 
348 	/* If we don't know protocol -- no error, pass it unmodified. */
349 	return true;
350 }
351 
352 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
353 				  unsigned int iphdroff,
354 				  const struct nf_conntrack_tuple *target,
355 				  enum nf_nat_manip_type maniptype)
356 {
357 	struct iphdr *iph;
358 	unsigned int hdroff;
359 
360 	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
361 		return false;
362 
363 	iph = (void *)skb->data + iphdroff;
364 	hdroff = iphdroff + iph->ihl * 4;
365 
366 	if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
367 		return false;
368 	iph = (void *)skb->data + iphdroff;
369 
370 	if (maniptype == NF_NAT_MANIP_SRC) {
371 		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
372 		iph->saddr = target->src.u3.ip;
373 	} else {
374 		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
375 		iph->daddr = target->dst.u3.ip;
376 	}
377 	return true;
378 }
379 
380 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
381 				  unsigned int iphdroff,
382 				  const struct nf_conntrack_tuple *target,
383 				  enum nf_nat_manip_type maniptype)
384 {
385 #if IS_ENABLED(CONFIG_IPV6)
386 	struct ipv6hdr *ipv6h;
387 	__be16 frag_off;
388 	int hdroff;
389 	u8 nexthdr;
390 
391 	if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
392 		return false;
393 
394 	ipv6h = (void *)skb->data + iphdroff;
395 	nexthdr = ipv6h->nexthdr;
396 	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
397 				  &nexthdr, &frag_off);
398 	if (hdroff < 0)
399 		goto manip_addr;
400 
401 	if ((frag_off & htons(~0x7)) == 0 &&
402 	    !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
403 		return false;
404 
405 	/* must reload, offset might have changed */
406 	ipv6h = (void *)skb->data + iphdroff;
407 
408 manip_addr:
409 	if (maniptype == NF_NAT_MANIP_SRC)
410 		ipv6h->saddr = target->src.u3.in6;
411 	else
412 		ipv6h->daddr = target->dst.u3.in6;
413 
414 #endif
415 	return true;
416 }
417 
418 unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
419 			      enum nf_nat_manip_type mtype,
420 			      enum ip_conntrack_dir dir)
421 {
422 	struct nf_conntrack_tuple target;
423 
424 	/* We are aiming to look like inverse of other direction. */
425 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
426 
427 	switch (target.src.l3num) {
428 	case NFPROTO_IPV6:
429 		if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
430 			return NF_ACCEPT;
431 		break;
432 	case NFPROTO_IPV4:
433 		if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
434 			return NF_ACCEPT;
435 		break;
436 	default:
437 		WARN_ON_ONCE(1);
438 		break;
439 	}
440 
441 	return NF_DROP;
442 }
443 
444 static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
445 				    unsigned int iphdroff, __sum16 *check,
446 				    const struct nf_conntrack_tuple *t,
447 				    enum nf_nat_manip_type maniptype)
448 {
449 	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
450 	__be32 oldip, newip;
451 
452 	if (maniptype == NF_NAT_MANIP_SRC) {
453 		oldip = iph->saddr;
454 		newip = t->src.u3.ip;
455 	} else {
456 		oldip = iph->daddr;
457 		newip = t->dst.u3.ip;
458 	}
459 	inet_proto_csum_replace4(check, skb, oldip, newip, true);
460 }
461 
462 static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
463 				    unsigned int iphdroff, __sum16 *check,
464 				    const struct nf_conntrack_tuple *t,
465 				    enum nf_nat_manip_type maniptype)
466 {
467 #if IS_ENABLED(CONFIG_IPV6)
468 	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
469 	const struct in6_addr *oldip, *newip;
470 
471 	if (maniptype == NF_NAT_MANIP_SRC) {
472 		oldip = &ipv6h->saddr;
473 		newip = &t->src.u3.in6;
474 	} else {
475 		oldip = &ipv6h->daddr;
476 		newip = &t->dst.u3.in6;
477 	}
478 	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
479 				  newip->s6_addr32, true);
480 #endif
481 }
482 
483 static void nf_csum_update(struct sk_buff *skb,
484 			   unsigned int iphdroff, __sum16 *check,
485 			   const struct nf_conntrack_tuple *t,
486 			   enum nf_nat_manip_type maniptype)
487 {
488 	switch (t->src.l3num) {
489 	case NFPROTO_IPV4:
490 		nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
491 		return;
492 	case NFPROTO_IPV6:
493 		nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
494 		return;
495 	}
496 }
497 
498 static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
499 				    u8 proto, void *data, __sum16 *check,
500 				    int datalen, int oldlen)
501 {
502 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
503 		const struct iphdr *iph = ip_hdr(skb);
504 
505 		skb->ip_summed = CHECKSUM_PARTIAL;
506 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
507 			ip_hdrlen(skb);
508 		skb->csum_offset = (void *)check - data;
509 		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
510 					    proto, 0);
511 	} else {
512 		inet_proto_csum_replace2(check, skb,
513 					 htons(oldlen), htons(datalen), true);
514 	}
515 }
516 
517 #if IS_ENABLED(CONFIG_IPV6)
518 static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
519 				    u8 proto, void *data, __sum16 *check,
520 				    int datalen, int oldlen)
521 {
522 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
523 		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
524 
525 		skb->ip_summed = CHECKSUM_PARTIAL;
526 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
527 			(data - (void *)skb->data);
528 		skb->csum_offset = (void *)check - data;
529 		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
530 					  datalen, proto, 0);
531 	} else {
532 		inet_proto_csum_replace2(check, skb,
533 					 htons(oldlen), htons(datalen), true);
534 	}
535 }
536 #endif
537 
538 void nf_nat_csum_recalc(struct sk_buff *skb,
539 			u8 nfproto, u8 proto, void *data, __sum16 *check,
540 			int datalen, int oldlen)
541 {
542 	switch (nfproto) {
543 	case NFPROTO_IPV4:
544 		nf_nat_ipv4_csum_recalc(skb, proto, data, check,
545 					datalen, oldlen);
546 		return;
547 #if IS_ENABLED(CONFIG_IPV6)
548 	case NFPROTO_IPV6:
549 		nf_nat_ipv6_csum_recalc(skb, proto, data, check,
550 					datalen, oldlen);
551 		return;
552 #endif
553 	}
554 
555 	WARN_ON_ONCE(1);
556 }
557 
558 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
559 	.l3proto		= NFPROTO_IPV4,
560 };
561 
562 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
563 				  struct nf_conn *ct,
564 				  enum ip_conntrack_info ctinfo,
565 				  unsigned int hooknum)
566 {
567 	struct {
568 		struct icmphdr	icmp;
569 		struct iphdr	ip;
570 	} *inside;
571 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
572 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
573 	unsigned int hdrlen = ip_hdrlen(skb);
574 	struct nf_conntrack_tuple target;
575 	unsigned long statusbit;
576 
577 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
578 
579 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
580 		return 0;
581 	if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
582 		return 0;
583 
584 	inside = (void *)skb->data + hdrlen;
585 	if (inside->icmp.type == ICMP_REDIRECT) {
586 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
587 			return 0;
588 		if (ct->status & IPS_NAT_MASK)
589 			return 0;
590 	}
591 
592 	if (manip == NF_NAT_MANIP_SRC)
593 		statusbit = IPS_SRC_NAT;
594 	else
595 		statusbit = IPS_DST_NAT;
596 
597 	/* Invert if this is reply direction */
598 	if (dir == IP_CT_DIR_REPLY)
599 		statusbit ^= IPS_NAT_MASK;
600 
601 	if (!(ct->status & statusbit))
602 		return 1;
603 
604 	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
605 				   &ct->tuplehash[!dir].tuple, !manip))
606 		return 0;
607 
608 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
609 		/* Reloading "inside" here since manip_pkt may reallocate */
610 		inside = (void *)skb->data + hdrlen;
611 		inside->icmp.checksum = 0;
612 		inside->icmp.checksum =
613 			csum_fold(skb_checksum(skb, hdrlen,
614 					       skb->len - hdrlen, 0));
615 	}
616 
617 	/* Change outer to look like the reply to an incoming packet */
618 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
619 	target.dst.protonum = IPPROTO_ICMP;
620 	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
621 		return 0;
622 
623 	return 1;
624 }
625 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
626 
627 static unsigned int
628 nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
629 	       const struct nf_hook_state *state)
630 {
631 	struct nf_conn *ct;
632 	enum ip_conntrack_info ctinfo;
633 
634 	ct = nf_ct_get(skb, &ctinfo);
635 	if (!ct)
636 		return NF_ACCEPT;
637 
638 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
639 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
640 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
641 							   state->hook))
642 				return NF_DROP;
643 			else
644 				return NF_ACCEPT;
645 		}
646 	}
647 
648 	return nf_nat_inet_fn(priv, skb, state);
649 }
650 
651 static unsigned int
652 nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
653 	       const struct nf_hook_state *state)
654 {
655 	unsigned int ret;
656 	__be32 daddr = ip_hdr(skb)->daddr;
657 
658 	ret = nf_nat_ipv4_fn(priv, skb, state);
659 	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
660 		skb_dst_drop(skb);
661 
662 	return ret;
663 }
664 
665 static unsigned int
666 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
667 		const struct nf_hook_state *state)
668 {
669 #ifdef CONFIG_XFRM
670 	const struct nf_conn *ct;
671 	enum ip_conntrack_info ctinfo;
672 	int err;
673 #endif
674 	unsigned int ret;
675 
676 	ret = nf_nat_ipv4_fn(priv, skb, state);
677 #ifdef CONFIG_XFRM
678 	if (ret != NF_ACCEPT)
679 		return ret;
680 
681 	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
682 		return ret;
683 
684 	ct = nf_ct_get(skb, &ctinfo);
685 	if (ct) {
686 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
687 
688 		if (ct->tuplehash[dir].tuple.src.u3.ip !=
689 		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
690 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
691 		     ct->tuplehash[dir].tuple.src.u.all !=
692 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
693 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
694 			if (err < 0)
695 				ret = NF_DROP_ERR(err);
696 		}
697 	}
698 #endif
699 	return ret;
700 }
701 
702 static unsigned int
703 nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
704 		     const struct nf_hook_state *state)
705 {
706 	const struct nf_conn *ct;
707 	enum ip_conntrack_info ctinfo;
708 	unsigned int ret;
709 	int err;
710 
711 	ret = nf_nat_ipv4_fn(priv, skb, state);
712 	if (ret != NF_ACCEPT)
713 		return ret;
714 
715 	ct = nf_ct_get(skb, &ctinfo);
716 	if (ct) {
717 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
718 
719 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
720 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
721 			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
722 			if (err < 0)
723 				ret = NF_DROP_ERR(err);
724 		}
725 #ifdef CONFIG_XFRM
726 		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
727 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
728 			 ct->tuplehash[dir].tuple.dst.u.all !=
729 			 ct->tuplehash[!dir].tuple.src.u.all) {
730 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
731 			if (err < 0)
732 				ret = NF_DROP_ERR(err);
733 		}
734 #endif
735 	}
736 	return ret;
737 }
738 
739 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
740 	/* Before packet filtering, change destination */
741 	{
742 		.hook		= nf_nat_ipv4_in,
743 		.pf		= NFPROTO_IPV4,
744 		.hooknum	= NF_INET_PRE_ROUTING,
745 		.priority	= NF_IP_PRI_NAT_DST,
746 	},
747 	/* After packet filtering, change source */
748 	{
749 		.hook		= nf_nat_ipv4_out,
750 		.pf		= NFPROTO_IPV4,
751 		.hooknum	= NF_INET_POST_ROUTING,
752 		.priority	= NF_IP_PRI_NAT_SRC,
753 	},
754 	/* Before packet filtering, change destination */
755 	{
756 		.hook		= nf_nat_ipv4_local_fn,
757 		.pf		= NFPROTO_IPV4,
758 		.hooknum	= NF_INET_LOCAL_OUT,
759 		.priority	= NF_IP_PRI_NAT_DST,
760 	},
761 	/* After packet filtering, change source */
762 	{
763 		.hook		= nf_nat_ipv4_fn,
764 		.pf		= NFPROTO_IPV4,
765 		.hooknum	= NF_INET_LOCAL_IN,
766 		.priority	= NF_IP_PRI_NAT_SRC,
767 	},
768 };
769 
770 int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
771 {
772 	return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
773 }
774 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_register_fn);
775 
776 void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
777 {
778 	nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
779 }
780 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
781 
782 int nf_nat_l3proto_init(void)
783 {
784 	int ret = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
785 
786 #if IS_ENABLED(CONFIG_IPV6)
787 	if (ret)
788 		return ret;
789 
790 	ret = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
791 	if (ret == 0)
792 		return ret;
793 
794 	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
795 #endif
796 	return ret;
797 }
798 
799 void nf_nat_l3proto_exit(void)
800 {
801 #if IS_ENABLED(CONFIG_IPV6)
802 	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
803 #endif
804 	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
805 }
806 
807 #if IS_ENABLED(CONFIG_IPV6)
808 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
809 	.l3proto		= NFPROTO_IPV6,
810 };
811 
812 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
813 				    struct nf_conn *ct,
814 				    enum ip_conntrack_info ctinfo,
815 				    unsigned int hooknum,
816 				    unsigned int hdrlen)
817 {
818 	struct {
819 		struct icmp6hdr	icmp6;
820 		struct ipv6hdr	ip6;
821 	} *inside;
822 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
823 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
824 	struct nf_conntrack_tuple target;
825 	unsigned long statusbit;
826 
827 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
828 
829 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
830 		return 0;
831 	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
832 		return 0;
833 
834 	inside = (void *)skb->data + hdrlen;
835 	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
836 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
837 			return 0;
838 		if (ct->status & IPS_NAT_MASK)
839 			return 0;
840 	}
841 
842 	if (manip == NF_NAT_MANIP_SRC)
843 		statusbit = IPS_SRC_NAT;
844 	else
845 		statusbit = IPS_DST_NAT;
846 
847 	/* Invert if this is reply direction */
848 	if (dir == IP_CT_DIR_REPLY)
849 		statusbit ^= IPS_NAT_MASK;
850 
851 	if (!(ct->status & statusbit))
852 		return 1;
853 
854 	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
855 				   &ct->tuplehash[!dir].tuple, !manip))
856 		return 0;
857 
858 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
859 		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
860 
861 		inside = (void *)skb->data + hdrlen;
862 		inside->icmp6.icmp6_cksum = 0;
863 		inside->icmp6.icmp6_cksum =
864 			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
865 					skb->len - hdrlen, IPPROTO_ICMPV6,
866 					skb_checksum(skb, hdrlen,
867 						     skb->len - hdrlen, 0));
868 	}
869 
870 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
871 	target.dst.protonum = IPPROTO_ICMPV6;
872 	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
873 		return 0;
874 
875 	return 1;
876 }
877 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
878 
879 static unsigned int
880 nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
881 	       const struct nf_hook_state *state)
882 {
883 	struct nf_conn *ct;
884 	enum ip_conntrack_info ctinfo;
885 	__be16 frag_off;
886 	int hdrlen;
887 	u8 nexthdr;
888 
889 	ct = nf_ct_get(skb, &ctinfo);
890 	/* Can't track?  It's not due to stress, or conntrack would
891 	 * have dropped it.  Hence it's the user's responsibilty to
892 	 * packet filter it out, or implement conntrack/NAT for that
893 	 * protocol. 8) --RR
894 	 */
895 	if (!ct)
896 		return NF_ACCEPT;
897 
898 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
899 		nexthdr = ipv6_hdr(skb)->nexthdr;
900 		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
901 					  &nexthdr, &frag_off);
902 
903 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
904 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
905 							     state->hook,
906 							     hdrlen))
907 				return NF_DROP;
908 			else
909 				return NF_ACCEPT;
910 		}
911 	}
912 
913 	return nf_nat_inet_fn(priv, skb, state);
914 }
915 
916 static unsigned int
917 nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
918 	       const struct nf_hook_state *state)
919 {
920 	unsigned int ret;
921 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
922 
923 	ret = nf_nat_ipv6_fn(priv, skb, state);
924 	if (ret != NF_DROP && ret != NF_STOLEN &&
925 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
926 		skb_dst_drop(skb);
927 
928 	return ret;
929 }
930 
931 static unsigned int
932 nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
933 		const struct nf_hook_state *state)
934 {
935 #ifdef CONFIG_XFRM
936 	const struct nf_conn *ct;
937 	enum ip_conntrack_info ctinfo;
938 	int err;
939 #endif
940 	unsigned int ret;
941 
942 	ret = nf_nat_ipv6_fn(priv, skb, state);
943 #ifdef CONFIG_XFRM
944 	if (ret != NF_ACCEPT)
945 		return ret;
946 
947 	if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
948 		return ret;
949 	ct = nf_ct_get(skb, &ctinfo);
950 	if (ct) {
951 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
952 
953 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
954 				      &ct->tuplehash[!dir].tuple.dst.u3) ||
955 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
956 		     ct->tuplehash[dir].tuple.src.u.all !=
957 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
958 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
959 			if (err < 0)
960 				ret = NF_DROP_ERR(err);
961 		}
962 	}
963 #endif
964 
965 	return ret;
966 }
967 
968 static int nat_route_me_harder(struct net *net, struct sk_buff *skb)
969 {
970 #ifdef CONFIG_IPV6_MODULE
971 	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
972 
973 	if (!v6_ops)
974 		return -EHOSTUNREACH;
975 
976 	return v6_ops->route_me_harder(net, skb);
977 #else
978 	return ip6_route_me_harder(net, skb);
979 #endif
980 }
981 
982 static unsigned int
983 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
984 		     const struct nf_hook_state *state)
985 {
986 	const struct nf_conn *ct;
987 	enum ip_conntrack_info ctinfo;
988 	unsigned int ret;
989 	int err;
990 
991 	ret = nf_nat_ipv6_fn(priv, skb, state);
992 	if (ret != NF_ACCEPT)
993 		return ret;
994 
995 	ct = nf_ct_get(skb, &ctinfo);
996 	if (ct) {
997 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
998 
999 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
1000 				      &ct->tuplehash[!dir].tuple.src.u3)) {
1001 			err = nat_route_me_harder(state->net, skb);
1002 			if (err < 0)
1003 				ret = NF_DROP_ERR(err);
1004 		}
1005 #ifdef CONFIG_XFRM
1006 		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
1007 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
1008 			 ct->tuplehash[dir].tuple.dst.u.all !=
1009 			 ct->tuplehash[!dir].tuple.src.u.all) {
1010 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
1011 			if (err < 0)
1012 				ret = NF_DROP_ERR(err);
1013 		}
1014 #endif
1015 	}
1016 
1017 	return ret;
1018 }
1019 
1020 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
1021 	/* Before packet filtering, change destination */
1022 	{
1023 		.hook		= nf_nat_ipv6_in,
1024 		.pf		= NFPROTO_IPV6,
1025 		.hooknum	= NF_INET_PRE_ROUTING,
1026 		.priority	= NF_IP6_PRI_NAT_DST,
1027 	},
1028 	/* After packet filtering, change source */
1029 	{
1030 		.hook		= nf_nat_ipv6_out,
1031 		.pf		= NFPROTO_IPV6,
1032 		.hooknum	= NF_INET_POST_ROUTING,
1033 		.priority	= NF_IP6_PRI_NAT_SRC,
1034 	},
1035 	/* Before packet filtering, change destination */
1036 	{
1037 		.hook		= nf_nat_ipv6_local_fn,
1038 		.pf		= NFPROTO_IPV6,
1039 		.hooknum	= NF_INET_LOCAL_OUT,
1040 		.priority	= NF_IP6_PRI_NAT_DST,
1041 	},
1042 	/* After packet filtering, change source */
1043 	{
1044 		.hook		= nf_nat_ipv6_fn,
1045 		.pf		= NFPROTO_IPV6,
1046 		.hooknum	= NF_INET_LOCAL_IN,
1047 		.priority	= NF_IP6_PRI_NAT_SRC,
1048 	},
1049 };
1050 
1051 int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
1052 {
1053 	return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops,
1054 				  ARRAY_SIZE(nf_nat_ipv6_ops));
1055 }
1056 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn);
1057 
1058 void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1059 {
1060 	nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1061 }
1062 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
1063 #endif /* CONFIG_IPV6 */
1064