xref: /openbmc/linux/net/ipv6/netfilter.c (revision 22a41e9a5044bf3519f05b4a00e99af34bfeb40c)
1 /*
2  * IPv6 specific functions of netfilter core
3  *
4  * Rusty Russell (C) 2000 -- This code is GPL.
5  * Patrick McHardy (C) 2006-2012
6  */
7 #include <linux/kernel.h>
8 #include <linux/init.h>
9 #include <linux/ipv6.h>
10 #include <linux/netfilter.h>
11 #include <linux/netfilter_ipv6.h>
12 #include <linux/export.h>
13 #include <net/addrconf.h>
14 #include <net/dst.h>
15 #include <net/ipv6.h>
16 #include <net/ip6_route.h>
17 #include <net/xfrm.h>
18 #include <net/netfilter/nf_queue.h>
19 #include <net/netfilter/nf_conntrack_bridge.h>
20 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
21 #include "../bridge/br_private.h"
22 
23 int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
24 {
25 	const struct ipv6hdr *iph = ipv6_hdr(skb);
26 	struct sock *sk = sk_to_full_sk(sk_partial);
27 	struct flow_keys flkeys;
28 	unsigned int hh_len;
29 	struct dst_entry *dst;
30 	int strict = (ipv6_addr_type(&iph->daddr) &
31 		      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
32 	struct flowi6 fl6 = {
33 		.flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
34 			strict ? skb_dst(skb)->dev->ifindex : 0,
35 		.flowi6_mark = skb->mark,
36 		.flowi6_uid = sock_net_uid(net, sk),
37 		.daddr = iph->daddr,
38 		.saddr = iph->saddr,
39 	};
40 	int err;
41 
42 	fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
43 	dst = ip6_route_output(net, sk, &fl6);
44 	err = dst->error;
45 	if (err) {
46 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
47 		net_dbg_ratelimited("ip6_route_me_harder: No more route\n");
48 		dst_release(dst);
49 		return err;
50 	}
51 
52 	/* Drop old route. */
53 	skb_dst_drop(skb);
54 
55 	skb_dst_set(skb, dst);
56 
57 #ifdef CONFIG_XFRM
58 	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
59 	    xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
60 		skb_dst_set(skb, NULL);
61 		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
62 		if (IS_ERR(dst))
63 			return PTR_ERR(dst);
64 		skb_dst_set(skb, dst);
65 	}
66 #endif
67 
68 	/* Change in oif may mean change in hh_len. */
69 	hh_len = skb_dst(skb)->dev->hard_header_len;
70 	if (skb_headroom(skb) < hh_len &&
71 	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
72 			     0, GFP_ATOMIC))
73 		return -ENOMEM;
74 
75 	return 0;
76 }
77 EXPORT_SYMBOL(ip6_route_me_harder);
78 
79 static int nf_ip6_reroute(struct sk_buff *skb,
80 			  const struct nf_queue_entry *entry)
81 {
82 	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
83 
84 	if (entry->state.hook == NF_INET_LOCAL_OUT) {
85 		const struct ipv6hdr *iph = ipv6_hdr(skb);
86 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
87 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
88 		    skb->mark != rt_info->mark)
89 			return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
90 	}
91 	return 0;
92 }
93 
94 int __nf_ip6_route(struct net *net, struct dst_entry **dst,
95 		   struct flowi *fl, bool strict)
96 {
97 	static const struct ipv6_pinfo fake_pinfo;
98 	static const struct inet_sock fake_sk = {
99 		/* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
100 		.sk.sk_bound_dev_if = 1,
101 		.pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
102 	};
103 	const void *sk = strict ? &fake_sk : NULL;
104 	struct dst_entry *result;
105 	int err;
106 
107 	result = ip6_route_output(net, sk, &fl->u.ip6);
108 	err = result->error;
109 	if (err)
110 		dst_release(result);
111 	else
112 		*dst = result;
113 	return err;
114 }
115 EXPORT_SYMBOL_GPL(__nf_ip6_route);
116 
117 int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
118 		    struct nf_bridge_frag_data *data,
119 		    int (*output)(struct net *, struct sock *sk,
120 				  const struct nf_bridge_frag_data *data,
121 				  struct sk_buff *))
122 {
123 	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
124 	bool mono_delivery_time = skb->mono_delivery_time;
125 	ktime_t tstamp = skb->tstamp;
126 	struct ip6_frag_state state;
127 	u8 *prevhdr, nexthdr = 0;
128 	unsigned int mtu, hlen;
129 	int hroom, err = 0;
130 	__be32 frag_id;
131 
132 	err = ip6_find_1stfragopt(skb, &prevhdr);
133 	if (err < 0)
134 		goto blackhole;
135 	hlen = err;
136 	nexthdr = *prevhdr;
137 
138 	mtu = skb->dev->mtu;
139 	if (frag_max_size > mtu ||
140 	    frag_max_size < IPV6_MIN_MTU)
141 		goto blackhole;
142 
143 	mtu = frag_max_size;
144 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
145 		goto blackhole;
146 	mtu -= hlen + sizeof(struct frag_hdr);
147 
148 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
149 				    &ipv6_hdr(skb)->saddr);
150 
151 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
152 	    (err = skb_checksum_help(skb)))
153 		goto blackhole;
154 
155 	hroom = LL_RESERVED_SPACE(skb->dev);
156 	if (skb_has_frag_list(skb)) {
157 		unsigned int first_len = skb_pagelen(skb);
158 		struct ip6_fraglist_iter iter;
159 		struct sk_buff *frag2;
160 
161 		if (first_len - hlen > mtu ||
162 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
163 			goto blackhole;
164 
165 		if (skb_cloned(skb))
166 			goto slow_path;
167 
168 		skb_walk_frags(skb, frag2) {
169 			if (frag2->len > mtu ||
170 			    skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
171 				goto blackhole;
172 
173 			/* Partially cloned skb? */
174 			if (skb_shared(frag2))
175 				goto slow_path;
176 		}
177 
178 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
179 					&iter);
180 		if (err < 0)
181 			goto blackhole;
182 
183 		for (;;) {
184 			/* Prepare header of the next frame,
185 			 * before previous one went down.
186 			 */
187 			if (iter.frag)
188 				ip6_fraglist_prepare(skb, &iter);
189 
190 			skb_set_delivery_time(skb, tstamp, mono_delivery_time);
191 			err = output(net, sk, data, skb);
192 			if (err || !iter.frag)
193 				break;
194 
195 			skb = ip6_fraglist_next(&iter);
196 		}
197 
198 		kfree(iter.tmp_hdr);
199 		if (!err)
200 			return 0;
201 
202 		kfree_skb_list(iter.frag);
203 		return err;
204 	}
205 slow_path:
206 	/* This is a linearized skbuff, the original geometry is lost for us.
207 	 * This may also be a clone skbuff, we could preserve the geometry for
208 	 * the copies but probably not worth the effort.
209 	 */
210 	ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom,
211 		      LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id,
212 		      &state);
213 
214 	while (state.left > 0) {
215 		struct sk_buff *skb2;
216 
217 		skb2 = ip6_frag_next(skb, &state);
218 		if (IS_ERR(skb2)) {
219 			err = PTR_ERR(skb2);
220 			goto blackhole;
221 		}
222 
223 		skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
224 		err = output(net, sk, data, skb2);
225 		if (err)
226 			goto blackhole;
227 	}
228 	consume_skb(skb);
229 	return err;
230 
231 blackhole:
232 	kfree_skb(skb);
233 	return 0;
234 }
235 EXPORT_SYMBOL_GPL(br_ip6_fragment);
236 
237 static const struct nf_ipv6_ops ipv6ops = {
238 #if IS_MODULE(CONFIG_IPV6)
239 	.chk_addr		= ipv6_chk_addr,
240 	.route_me_harder	= ip6_route_me_harder,
241 	.dev_get_saddr		= ipv6_dev_get_saddr,
242 	.route			= __nf_ip6_route,
243 #if IS_ENABLED(CONFIG_SYN_COOKIES)
244 	.cookie_init_sequence	= __cookie_v6_init_sequence,
245 	.cookie_v6_check	= __cookie_v6_check,
246 #endif
247 #endif
248 	.route_input		= ip6_route_input,
249 	.fragment		= ip6_fragment,
250 	.reroute		= nf_ip6_reroute,
251 #if IS_MODULE(CONFIG_IPV6)
252 	.br_fragment		= br_ip6_fragment,
253 #endif
254 };
255 
256 int __init ipv6_netfilter_init(void)
257 {
258 	RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
259 	return 0;
260 }
261 
262 /* This can be called from inet6_init() on errors, so it cannot
263  * be marked __exit. -DaveM
264  */
265 void ipv6_netfilter_fini(void)
266 {
267 	RCU_INIT_POINTER(nf_ipv6_ops, NULL);
268 }
269