xref: /openbmc/linux/net/netfilter/nf_nat_proto.c (revision 14cb1a6e)
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8 
9 #include <linux/types.h>
10 #include <linux/export.h>
11 #include <linux/init.h>
12 #include <linux/udp.h>
13 #include <linux/tcp.h>
14 #include <linux/icmp.h>
15 #include <linux/icmpv6.h>
16 
17 #include <linux/dccp.h>
18 #include <linux/sctp.h>
19 #include <net/sctp/checksum.h>
20 
21 #include <linux/netfilter.h>
22 #include <net/netfilter/nf_nat.h>
23 #include <net/netfilter/nf_nat_core.h>
24 #include <net/netfilter/nf_nat_l3proto.h>
25 
26 #include <linux/ipv6.h>
27 #include <linux/netfilter_ipv6.h>
28 #include <net/checksum.h>
29 #include <net/ip6_checksum.h>
30 #include <net/ip6_route.h>
31 #include <net/xfrm.h>
32 #include <net/ipv6.h>
33 
34 #include <net/netfilter/nf_conntrack_core.h>
35 #include <net/netfilter/nf_conntrack.h>
36 #include <linux/netfilter/nfnetlink_conntrack.h>
37 
38 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
39 #if IS_ENABLED(CONFIG_IPV6)
40 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
41 #endif
42 
43 static void
44 __udp_manip_pkt(struct sk_buff *skb,
45 	        const struct nf_nat_l3proto *l3proto,
46 	        unsigned int iphdroff, struct udphdr *hdr,
47 	        const struct nf_conntrack_tuple *tuple,
48 	        enum nf_nat_manip_type maniptype, bool do_csum)
49 {
50 	__be16 *portptr, newport;
51 
52 	if (maniptype == NF_NAT_MANIP_SRC) {
53 		/* Get rid of src port */
54 		newport = tuple->src.u.udp.port;
55 		portptr = &hdr->source;
56 	} else {
57 		/* Get rid of dst port */
58 		newport = tuple->dst.u.udp.port;
59 		portptr = &hdr->dest;
60 	}
61 	if (do_csum) {
62 		l3proto->csum_update(skb, iphdroff, &hdr->check,
63 				     tuple, maniptype);
64 		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
65 					 false);
66 		if (!hdr->check)
67 			hdr->check = CSUM_MANGLED_0;
68 	}
69 	*portptr = newport;
70 }
71 
72 static bool udp_manip_pkt(struct sk_buff *skb,
73 			  const struct nf_nat_l3proto *l3proto,
74 			  unsigned int iphdroff, unsigned int hdroff,
75 			  const struct nf_conntrack_tuple *tuple,
76 			  enum nf_nat_manip_type maniptype)
77 {
78 	struct udphdr *hdr;
79 	bool do_csum;
80 
81 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
82 		return false;
83 
84 	hdr = (struct udphdr *)(skb->data + hdroff);
85 	do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
86 
87 	__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum);
88 	return true;
89 }
90 
91 static bool udplite_manip_pkt(struct sk_buff *skb,
92 			      const struct nf_nat_l3proto *l3proto,
93 			      unsigned int iphdroff, unsigned int hdroff,
94 			      const struct nf_conntrack_tuple *tuple,
95 			      enum nf_nat_manip_type maniptype)
96 {
97 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
98 	struct udphdr *hdr;
99 
100 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
101 		return false;
102 
103 	hdr = (struct udphdr *)(skb->data + hdroff);
104 	__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true);
105 #endif
106 	return true;
107 }
108 
109 static bool
110 sctp_manip_pkt(struct sk_buff *skb,
111 	       const struct nf_nat_l3proto *l3proto,
112 	       unsigned int iphdroff, unsigned int hdroff,
113 	       const struct nf_conntrack_tuple *tuple,
114 	       enum nf_nat_manip_type maniptype)
115 {
116 #ifdef CONFIG_NF_CT_PROTO_SCTP
117 	struct sctphdr *hdr;
118 	int hdrsize = 8;
119 
120 	/* This could be an inner header returned in imcp packet; in such
121 	 * cases we cannot update the checksum field since it is outside
122 	 * of the 8 bytes of transport layer headers we are guaranteed.
123 	 */
124 	if (skb->len >= hdroff + sizeof(*hdr))
125 		hdrsize = sizeof(*hdr);
126 
127 	if (!skb_make_writable(skb, hdroff + hdrsize))
128 		return false;
129 
130 	hdr = (struct sctphdr *)(skb->data + hdroff);
131 
132 	if (maniptype == NF_NAT_MANIP_SRC) {
133 		/* Get rid of src port */
134 		hdr->source = tuple->src.u.sctp.port;
135 	} else {
136 		/* Get rid of dst port */
137 		hdr->dest = tuple->dst.u.sctp.port;
138 	}
139 
140 	if (hdrsize < sizeof(*hdr))
141 		return true;
142 
143 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
144 		hdr->checksum = sctp_compute_cksum(skb, hdroff);
145 		skb->ip_summed = CHECKSUM_NONE;
146 	}
147 
148 #endif
149 	return true;
150 }
151 
152 static bool
153 tcp_manip_pkt(struct sk_buff *skb,
154 	      const struct nf_nat_l3proto *l3proto,
155 	      unsigned int iphdroff, unsigned int hdroff,
156 	      const struct nf_conntrack_tuple *tuple,
157 	      enum nf_nat_manip_type maniptype)
158 {
159 	struct tcphdr *hdr;
160 	__be16 *portptr, newport, oldport;
161 	int hdrsize = 8; /* TCP connection tracking guarantees this much */
162 
163 	/* this could be a inner header returned in icmp packet; in such
164 	   cases we cannot update the checksum field since it is outside of
165 	   the 8 bytes of transport layer headers we are guaranteed */
166 	if (skb->len >= hdroff + sizeof(struct tcphdr))
167 		hdrsize = sizeof(struct tcphdr);
168 
169 	if (!skb_make_writable(skb, hdroff + hdrsize))
170 		return false;
171 
172 	hdr = (struct tcphdr *)(skb->data + hdroff);
173 
174 	if (maniptype == NF_NAT_MANIP_SRC) {
175 		/* Get rid of src port */
176 		newport = tuple->src.u.tcp.port;
177 		portptr = &hdr->source;
178 	} else {
179 		/* Get rid of dst port */
180 		newport = tuple->dst.u.tcp.port;
181 		portptr = &hdr->dest;
182 	}
183 
184 	oldport = *portptr;
185 	*portptr = newport;
186 
187 	if (hdrsize < sizeof(*hdr))
188 		return true;
189 
190 	l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
191 	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
192 	return true;
193 }
194 
195 static bool
196 dccp_manip_pkt(struct sk_buff *skb,
197 	       const struct nf_nat_l3proto *l3proto,
198 	       unsigned int iphdroff, unsigned int hdroff,
199 	       const struct nf_conntrack_tuple *tuple,
200 	       enum nf_nat_manip_type maniptype)
201 {
202 #ifdef CONFIG_NF_CT_PROTO_DCCP
203 	struct dccp_hdr *hdr;
204 	__be16 *portptr, oldport, newport;
205 	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
206 
207 	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
208 		hdrsize = sizeof(struct dccp_hdr);
209 
210 	if (!skb_make_writable(skb, hdroff + hdrsize))
211 		return false;
212 
213 	hdr = (struct dccp_hdr *)(skb->data + hdroff);
214 
215 	if (maniptype == NF_NAT_MANIP_SRC) {
216 		newport = tuple->src.u.dccp.port;
217 		portptr = &hdr->dccph_sport;
218 	} else {
219 		newport = tuple->dst.u.dccp.port;
220 		portptr = &hdr->dccph_dport;
221 	}
222 
223 	oldport = *portptr;
224 	*portptr = newport;
225 
226 	if (hdrsize < sizeof(*hdr))
227 		return true;
228 
229 	l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
230 			     tuple, maniptype);
231 	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
232 				 false);
233 #endif
234 	return true;
235 }
236 
237 static bool
238 icmp_manip_pkt(struct sk_buff *skb,
239 	       const struct nf_nat_l3proto *l3proto,
240 	       unsigned int iphdroff, unsigned int hdroff,
241 	       const struct nf_conntrack_tuple *tuple,
242 	       enum nf_nat_manip_type maniptype)
243 {
244 	struct icmphdr *hdr;
245 
246 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
247 		return false;
248 
249 	hdr = (struct icmphdr *)(skb->data + hdroff);
250 	inet_proto_csum_replace2(&hdr->checksum, skb,
251 				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
252 	hdr->un.echo.id = tuple->src.u.icmp.id;
253 	return true;
254 }
255 
256 static bool
257 icmpv6_manip_pkt(struct sk_buff *skb,
258 		 const struct nf_nat_l3proto *l3proto,
259 		 unsigned int iphdroff, unsigned int hdroff,
260 		 const struct nf_conntrack_tuple *tuple,
261 		 enum nf_nat_manip_type maniptype)
262 {
263 	struct icmp6hdr *hdr;
264 
265 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
266 		return false;
267 
268 	hdr = (struct icmp6hdr *)(skb->data + hdroff);
269 	l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
270 			     tuple, maniptype);
271 	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
272 	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
273 		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
274 					 hdr->icmp6_identifier,
275 					 tuple->src.u.icmp.id, false);
276 		hdr->icmp6_identifier = tuple->src.u.icmp.id;
277 	}
278 	return true;
279 }
280 
281 /* manipulate a GRE packet according to maniptype */
282 static bool
283 gre_manip_pkt(struct sk_buff *skb,
284 	      const struct nf_nat_l3proto *l3proto,
285 	      unsigned int iphdroff, unsigned int hdroff,
286 	      const struct nf_conntrack_tuple *tuple,
287 	      enum nf_nat_manip_type maniptype)
288 {
289 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
290 	const struct gre_base_hdr *greh;
291 	struct pptp_gre_header *pgreh;
292 
293 	/* pgreh includes two optional 32bit fields which are not required
294 	 * to be there.  That's where the magic '8' comes from */
295 	if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
296 		return false;
297 
298 	greh = (void *)skb->data + hdroff;
299 	pgreh = (struct pptp_gre_header *)greh;
300 
301 	/* we only have destination manip of a packet, since 'source key'
302 	 * is not present in the packet itself */
303 	if (maniptype != NF_NAT_MANIP_DST)
304 		return true;
305 
306 	switch (greh->flags & GRE_VERSION) {
307 	case GRE_VERSION_0:
308 		/* We do not currently NAT any GREv0 packets.
309 		 * Try to behave like "nf_nat_proto_unknown" */
310 		break;
311 	case GRE_VERSION_1:
312 		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
313 		pgreh->call_id = tuple->dst.u.gre.key;
314 		break;
315 	default:
316 		pr_debug("can't nat unknown GRE version\n");
317 		return false;
318 	}
319 #endif
320 	return true;
321 }
322 
323 static bool l4proto_manip_pkt(struct sk_buff *skb,
324 			      const struct nf_nat_l3proto *l3proto,
325 			      unsigned int iphdroff, unsigned int hdroff,
326 			      const struct nf_conntrack_tuple *tuple,
327 			      enum nf_nat_manip_type maniptype)
328 {
329 	switch (tuple->dst.protonum) {
330 	case IPPROTO_TCP:
331 		return tcp_manip_pkt(skb, l3proto, iphdroff, hdroff,
332 				     tuple, maniptype);
333 	case IPPROTO_UDP:
334 		return udp_manip_pkt(skb, l3proto, iphdroff, hdroff,
335 				     tuple, maniptype);
336 	case IPPROTO_UDPLITE:
337 		return udplite_manip_pkt(skb, l3proto, iphdroff, hdroff,
338 					 tuple, maniptype);
339 	case IPPROTO_SCTP:
340 		return sctp_manip_pkt(skb, l3proto, iphdroff, hdroff,
341 				      tuple, maniptype);
342 	case IPPROTO_ICMP:
343 		return icmp_manip_pkt(skb, l3proto, iphdroff, hdroff,
344 				      tuple, maniptype);
345 	case IPPROTO_ICMPV6:
346 		return icmpv6_manip_pkt(skb, l3proto, iphdroff, hdroff,
347 					tuple, maniptype);
348 	case IPPROTO_DCCP:
349 		return dccp_manip_pkt(skb, l3proto, iphdroff, hdroff,
350 				      tuple, maniptype);
351 	case IPPROTO_GRE:
352 		return gre_manip_pkt(skb, l3proto, iphdroff, hdroff,
353 				     tuple, maniptype);
354 	}
355 
356 	/* If we don't know protocol -- no error, pass it unmodified. */
357 	return true;
358 }
359 
360 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
361 				  unsigned int iphdroff,
362 				  const struct nf_conntrack_tuple *target,
363 				  enum nf_nat_manip_type maniptype)
364 {
365 	struct iphdr *iph;
366 	unsigned int hdroff;
367 
368 	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
369 		return false;
370 
371 	iph = (void *)skb->data + iphdroff;
372 	hdroff = iphdroff + iph->ihl * 4;
373 
374 	if (!l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
375 			       hdroff, target, maniptype))
376 		return false;
377 	iph = (void *)skb->data + iphdroff;
378 
379 	if (maniptype == NF_NAT_MANIP_SRC) {
380 		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
381 		iph->saddr = target->src.u3.ip;
382 	} else {
383 		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
384 		iph->daddr = target->dst.u3.ip;
385 	}
386 	return true;
387 }
388 
389 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
390 				  unsigned int iphdroff,
391 				  const struct nf_conntrack_tuple *target,
392 				  enum nf_nat_manip_type maniptype)
393 {
394 #if IS_ENABLED(CONFIG_IPV6)
395 	struct ipv6hdr *ipv6h;
396 	__be16 frag_off;
397 	int hdroff;
398 	u8 nexthdr;
399 
400 	if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
401 		return false;
402 
403 	ipv6h = (void *)skb->data + iphdroff;
404 	nexthdr = ipv6h->nexthdr;
405 	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
406 				  &nexthdr, &frag_off);
407 	if (hdroff < 0)
408 		goto manip_addr;
409 
410 	if ((frag_off & htons(~0x7)) == 0 &&
411 	    !l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
412 			       target, maniptype))
413 		return false;
414 
415 	/* must reload, offset might have changed */
416 	ipv6h = (void *)skb->data + iphdroff;
417 
418 manip_addr:
419 	if (maniptype == NF_NAT_MANIP_SRC)
420 		ipv6h->saddr = target->src.u3.in6;
421 	else
422 		ipv6h->daddr = target->dst.u3.in6;
423 
424 #endif
425 	return true;
426 }
427 
428 static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
429 				    unsigned int iphdroff, __sum16 *check,
430 				    const struct nf_conntrack_tuple *t,
431 				    enum nf_nat_manip_type maniptype)
432 {
433 	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
434 	__be32 oldip, newip;
435 
436 	if (maniptype == NF_NAT_MANIP_SRC) {
437 		oldip = iph->saddr;
438 		newip = t->src.u3.ip;
439 	} else {
440 		oldip = iph->daddr;
441 		newip = t->dst.u3.ip;
442 	}
443 	inet_proto_csum_replace4(check, skb, oldip, newip, true);
444 }
445 
446 static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
447 				    unsigned int iphdroff, __sum16 *check,
448 				    const struct nf_conntrack_tuple *t,
449 				    enum nf_nat_manip_type maniptype)
450 {
451 #if IS_ENABLED(CONFIG_IPV6)
452 	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
453 	const struct in6_addr *oldip, *newip;
454 
455 	if (maniptype == NF_NAT_MANIP_SRC) {
456 		oldip = &ipv6h->saddr;
457 		newip = &t->src.u3.in6;
458 	} else {
459 		oldip = &ipv6h->daddr;
460 		newip = &t->dst.u3.in6;
461 	}
462 	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
463 				  newip->s6_addr32, true);
464 #endif
465 }
466 
467 static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
468 				    u8 proto, void *data, __sum16 *check,
469 				    int datalen, int oldlen)
470 {
471 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
472 		const struct iphdr *iph = ip_hdr(skb);
473 
474 		skb->ip_summed = CHECKSUM_PARTIAL;
475 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
476 			ip_hdrlen(skb);
477 		skb->csum_offset = (void *)check - data;
478 		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
479 					    proto, 0);
480 	} else {
481 		inet_proto_csum_replace2(check, skb,
482 					 htons(oldlen), htons(datalen), true);
483 	}
484 }
485 
486 #if IS_ENABLED(CONFIG_IPV6)
487 static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
488 				    u8 proto, void *data, __sum16 *check,
489 				    int datalen, int oldlen)
490 {
491 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
492 		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
493 
494 		skb->ip_summed = CHECKSUM_PARTIAL;
495 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
496 			(data - (void *)skb->data);
497 		skb->csum_offset = (void *)check - data;
498 		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
499 					  datalen, proto, 0);
500 	} else {
501 		inet_proto_csum_replace2(check, skb,
502 					 htons(oldlen), htons(datalen), true);
503 	}
504 }
505 #endif
506 
507 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
508 	.l3proto		= NFPROTO_IPV4,
509 	.manip_pkt		= nf_nat_ipv4_manip_pkt,
510 	.csum_update		= nf_nat_ipv4_csum_update,
511 	.csum_recalc		= nf_nat_ipv4_csum_recalc,
512 };
513 
514 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
515 				  struct nf_conn *ct,
516 				  enum ip_conntrack_info ctinfo,
517 				  unsigned int hooknum)
518 {
519 	struct {
520 		struct icmphdr	icmp;
521 		struct iphdr	ip;
522 	} *inside;
523 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
524 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
525 	unsigned int hdrlen = ip_hdrlen(skb);
526 	struct nf_conntrack_tuple target;
527 	unsigned long statusbit;
528 
529 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
530 
531 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
532 		return 0;
533 	if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
534 		return 0;
535 
536 	inside = (void *)skb->data + hdrlen;
537 	if (inside->icmp.type == ICMP_REDIRECT) {
538 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
539 			return 0;
540 		if (ct->status & IPS_NAT_MASK)
541 			return 0;
542 	}
543 
544 	if (manip == NF_NAT_MANIP_SRC)
545 		statusbit = IPS_SRC_NAT;
546 	else
547 		statusbit = IPS_DST_NAT;
548 
549 	/* Invert if this is reply direction */
550 	if (dir == IP_CT_DIR_REPLY)
551 		statusbit ^= IPS_NAT_MASK;
552 
553 	if (!(ct->status & statusbit))
554 		return 1;
555 
556 	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
557 				   &ct->tuplehash[!dir].tuple, !manip))
558 		return 0;
559 
560 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
561 		/* Reloading "inside" here since manip_pkt may reallocate */
562 		inside = (void *)skb->data + hdrlen;
563 		inside->icmp.checksum = 0;
564 		inside->icmp.checksum =
565 			csum_fold(skb_checksum(skb, hdrlen,
566 					       skb->len - hdrlen, 0));
567 	}
568 
569 	/* Change outer to look like the reply to an incoming packet */
570 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
571 	target.dst.protonum = IPPROTO_ICMP;
572 	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
573 		return 0;
574 
575 	return 1;
576 }
577 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
578 
579 static unsigned int
580 nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
581 	       const struct nf_hook_state *state)
582 {
583 	struct nf_conn *ct;
584 	enum ip_conntrack_info ctinfo;
585 
586 	ct = nf_ct_get(skb, &ctinfo);
587 	if (!ct)
588 		return NF_ACCEPT;
589 
590 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
591 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
592 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
593 							   state->hook))
594 				return NF_DROP;
595 			else
596 				return NF_ACCEPT;
597 		}
598 	}
599 
600 	return nf_nat_inet_fn(priv, skb, state);
601 }
602 
603 static unsigned int
604 nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
605 	       const struct nf_hook_state *state)
606 {
607 	unsigned int ret;
608 	__be32 daddr = ip_hdr(skb)->daddr;
609 
610 	ret = nf_nat_ipv4_fn(priv, skb, state);
611 	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
612 		skb_dst_drop(skb);
613 
614 	return ret;
615 }
616 
617 static unsigned int
618 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
619 		const struct nf_hook_state *state)
620 {
621 #ifdef CONFIG_XFRM
622 	const struct nf_conn *ct;
623 	enum ip_conntrack_info ctinfo;
624 	int err;
625 #endif
626 	unsigned int ret;
627 
628 	ret = nf_nat_ipv4_fn(priv, skb, state);
629 #ifdef CONFIG_XFRM
630 	if (ret != NF_ACCEPT)
631 		return ret;
632 
633 	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
634 		return ret;
635 
636 	ct = nf_ct_get(skb, &ctinfo);
637 	if (ct) {
638 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
639 
640 		if (ct->tuplehash[dir].tuple.src.u3.ip !=
641 		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
642 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
643 		     ct->tuplehash[dir].tuple.src.u.all !=
644 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
645 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
646 			if (err < 0)
647 				ret = NF_DROP_ERR(err);
648 		}
649 	}
650 #endif
651 	return ret;
652 }
653 
654 static unsigned int
655 nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
656 		     const struct nf_hook_state *state)
657 {
658 	const struct nf_conn *ct;
659 	enum ip_conntrack_info ctinfo;
660 	unsigned int ret;
661 	int err;
662 
663 	ret = nf_nat_ipv4_fn(priv, skb, state);
664 	if (ret != NF_ACCEPT)
665 		return ret;
666 
667 	ct = nf_ct_get(skb, &ctinfo);
668 	if (ct) {
669 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
670 
671 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
672 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
673 			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
674 			if (err < 0)
675 				ret = NF_DROP_ERR(err);
676 		}
677 #ifdef CONFIG_XFRM
678 		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
679 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
680 			 ct->tuplehash[dir].tuple.dst.u.all !=
681 			 ct->tuplehash[!dir].tuple.src.u.all) {
682 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
683 			if (err < 0)
684 				ret = NF_DROP_ERR(err);
685 		}
686 #endif
687 	}
688 	return ret;
689 }
690 
691 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
692 	/* Before packet filtering, change destination */
693 	{
694 		.hook		= nf_nat_ipv4_in,
695 		.pf		= NFPROTO_IPV4,
696 		.hooknum	= NF_INET_PRE_ROUTING,
697 		.priority	= NF_IP_PRI_NAT_DST,
698 	},
699 	/* After packet filtering, change source */
700 	{
701 		.hook		= nf_nat_ipv4_out,
702 		.pf		= NFPROTO_IPV4,
703 		.hooknum	= NF_INET_POST_ROUTING,
704 		.priority	= NF_IP_PRI_NAT_SRC,
705 	},
706 	/* Before packet filtering, change destination */
707 	{
708 		.hook		= nf_nat_ipv4_local_fn,
709 		.pf		= NFPROTO_IPV4,
710 		.hooknum	= NF_INET_LOCAL_OUT,
711 		.priority	= NF_IP_PRI_NAT_DST,
712 	},
713 	/* After packet filtering, change source */
714 	{
715 		.hook		= nf_nat_ipv4_fn,
716 		.pf		= NFPROTO_IPV4,
717 		.hooknum	= NF_INET_LOCAL_IN,
718 		.priority	= NF_IP_PRI_NAT_SRC,
719 	},
720 };
721 
722 int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
723 {
724 	return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
725 }
726 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_register_fn);
727 
728 void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
729 {
730 	nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
731 }
732 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
733 
734 int nf_nat_l3proto_init(void)
735 {
736 	int ret = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
737 
738 #if IS_ENABLED(CONFIG_IPV6)
739 	if (ret)
740 		return ret;
741 
742 	ret = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
743 	if (ret == 0)
744 		return ret;
745 
746 	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
747 #endif
748 	return ret;
749 }
750 
751 void nf_nat_l3proto_exit(void)
752 {
753 #if IS_ENABLED(CONFIG_IPV6)
754 	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
755 #endif
756 	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
757 }
758 
759 #if IS_ENABLED(CONFIG_IPV6)
760 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
761 	.l3proto		= NFPROTO_IPV6,
762 	.manip_pkt		= nf_nat_ipv6_manip_pkt,
763 	.csum_update		= nf_nat_ipv6_csum_update,
764 	.csum_recalc		= nf_nat_ipv6_csum_recalc,
765 };
766 
767 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
768 				    struct nf_conn *ct,
769 				    enum ip_conntrack_info ctinfo,
770 				    unsigned int hooknum,
771 				    unsigned int hdrlen)
772 {
773 	struct {
774 		struct icmp6hdr	icmp6;
775 		struct ipv6hdr	ip6;
776 	} *inside;
777 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
778 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
779 	struct nf_conntrack_tuple target;
780 	unsigned long statusbit;
781 
782 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
783 
784 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
785 		return 0;
786 	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
787 		return 0;
788 
789 	inside = (void *)skb->data + hdrlen;
790 	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
791 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
792 			return 0;
793 		if (ct->status & IPS_NAT_MASK)
794 			return 0;
795 	}
796 
797 	if (manip == NF_NAT_MANIP_SRC)
798 		statusbit = IPS_SRC_NAT;
799 	else
800 		statusbit = IPS_DST_NAT;
801 
802 	/* Invert if this is reply direction */
803 	if (dir == IP_CT_DIR_REPLY)
804 		statusbit ^= IPS_NAT_MASK;
805 
806 	if (!(ct->status & statusbit))
807 		return 1;
808 
809 	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
810 				   &ct->tuplehash[!dir].tuple, !manip))
811 		return 0;
812 
813 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
814 		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
815 
816 		inside = (void *)skb->data + hdrlen;
817 		inside->icmp6.icmp6_cksum = 0;
818 		inside->icmp6.icmp6_cksum =
819 			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
820 					skb->len - hdrlen, IPPROTO_ICMPV6,
821 					skb_checksum(skb, hdrlen,
822 						     skb->len - hdrlen, 0));
823 	}
824 
825 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
826 	target.dst.protonum = IPPROTO_ICMPV6;
827 	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
828 		return 0;
829 
830 	return 1;
831 }
832 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
833 
834 static unsigned int
835 nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
836 	       const struct nf_hook_state *state)
837 {
838 	struct nf_conn *ct;
839 	enum ip_conntrack_info ctinfo;
840 	__be16 frag_off;
841 	int hdrlen;
842 	u8 nexthdr;
843 
844 	ct = nf_ct_get(skb, &ctinfo);
845 	/* Can't track?  It's not due to stress, or conntrack would
846 	 * have dropped it.  Hence it's the user's responsibilty to
847 	 * packet filter it out, or implement conntrack/NAT for that
848 	 * protocol. 8) --RR
849 	 */
850 	if (!ct)
851 		return NF_ACCEPT;
852 
853 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
854 		nexthdr = ipv6_hdr(skb)->nexthdr;
855 		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
856 					  &nexthdr, &frag_off);
857 
858 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
859 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
860 							     state->hook,
861 							     hdrlen))
862 				return NF_DROP;
863 			else
864 				return NF_ACCEPT;
865 		}
866 	}
867 
868 	return nf_nat_inet_fn(priv, skb, state);
869 }
870 
871 static unsigned int
872 nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
873 	       const struct nf_hook_state *state)
874 {
875 	unsigned int ret;
876 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
877 
878 	ret = nf_nat_ipv6_fn(priv, skb, state);
879 	if (ret != NF_DROP && ret != NF_STOLEN &&
880 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
881 		skb_dst_drop(skb);
882 
883 	return ret;
884 }
885 
886 static unsigned int
887 nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
888 		const struct nf_hook_state *state)
889 {
890 #ifdef CONFIG_XFRM
891 	const struct nf_conn *ct;
892 	enum ip_conntrack_info ctinfo;
893 	int err;
894 #endif
895 	unsigned int ret;
896 
897 	ret = nf_nat_ipv6_fn(priv, skb, state);
898 #ifdef CONFIG_XFRM
899 	if (ret != NF_ACCEPT)
900 		return ret;
901 
902 	if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
903 		return ret;
904 	ct = nf_ct_get(skb, &ctinfo);
905 	if (ct) {
906 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
907 
908 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
909 				      &ct->tuplehash[!dir].tuple.dst.u3) ||
910 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
911 		     ct->tuplehash[dir].tuple.src.u.all !=
912 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
913 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
914 			if (err < 0)
915 				ret = NF_DROP_ERR(err);
916 		}
917 	}
918 #endif
919 
920 	return ret;
921 }
922 
923 static int nat_route_me_harder(struct net *net, struct sk_buff *skb)
924 {
925 #ifdef CONFIG_IPV6_MODULE
926 	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
927 
928 	if (!v6_ops)
929 		return -EHOSTUNREACH;
930 
931 	return v6_ops->route_me_harder(net, skb);
932 #else
933 	return ip6_route_me_harder(net, skb);
934 #endif
935 }
936 
937 static unsigned int
938 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
939 		     const struct nf_hook_state *state)
940 {
941 	const struct nf_conn *ct;
942 	enum ip_conntrack_info ctinfo;
943 	unsigned int ret;
944 	int err;
945 
946 	ret = nf_nat_ipv6_fn(priv, skb, state);
947 	if (ret != NF_ACCEPT)
948 		return ret;
949 
950 	ct = nf_ct_get(skb, &ctinfo);
951 	if (ct) {
952 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
953 
954 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
955 				      &ct->tuplehash[!dir].tuple.src.u3)) {
956 			err = nat_route_me_harder(state->net, skb);
957 			if (err < 0)
958 				ret = NF_DROP_ERR(err);
959 		}
960 #ifdef CONFIG_XFRM
961 		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
962 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
963 			 ct->tuplehash[dir].tuple.dst.u.all !=
964 			 ct->tuplehash[!dir].tuple.src.u.all) {
965 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
966 			if (err < 0)
967 				ret = NF_DROP_ERR(err);
968 		}
969 #endif
970 	}
971 
972 	return ret;
973 }
974 
975 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
976 	/* Before packet filtering, change destination */
977 	{
978 		.hook		= nf_nat_ipv6_in,
979 		.pf		= NFPROTO_IPV6,
980 		.hooknum	= NF_INET_PRE_ROUTING,
981 		.priority	= NF_IP6_PRI_NAT_DST,
982 	},
983 	/* After packet filtering, change source */
984 	{
985 		.hook		= nf_nat_ipv6_out,
986 		.pf		= NFPROTO_IPV6,
987 		.hooknum	= NF_INET_POST_ROUTING,
988 		.priority	= NF_IP6_PRI_NAT_SRC,
989 	},
990 	/* Before packet filtering, change destination */
991 	{
992 		.hook		= nf_nat_ipv6_local_fn,
993 		.pf		= NFPROTO_IPV6,
994 		.hooknum	= NF_INET_LOCAL_OUT,
995 		.priority	= NF_IP6_PRI_NAT_DST,
996 	},
997 	/* After packet filtering, change source */
998 	{
999 		.hook		= nf_nat_ipv6_fn,
1000 		.pf		= NFPROTO_IPV6,
1001 		.hooknum	= NF_INET_LOCAL_IN,
1002 		.priority	= NF_IP6_PRI_NAT_SRC,
1003 	},
1004 };
1005 
1006 int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
1007 {
1008 	return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops,
1009 				  ARRAY_SIZE(nf_nat_ipv6_ops));
1010 }
1011 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn);
1012 
1013 void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1014 {
1015 	nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1016 }
1017 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
1018 #endif /* CONFIG_IPV6 */
1019