1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/types.h>
3 #include <linux/ip.h>
4 #include <linux/netfilter.h>
5 #include <linux/netfilter_ipv6.h>
6 #include <linux/netfilter_bridge.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/icmp.h>
10 #include <linux/sysctl.h>
11 #include <net/route.h>
12 #include <net/ip.h>
13 
14 #include <net/netfilter/nf_conntrack.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_helper.h>
17 #include <net/netfilter/nf_conntrack_bridge.h>
18 
19 #include <linux/netfilter/nf_tables.h>
20 #include <net/netfilter/nf_tables.h>
21 
22 #include "../br_private.h"
23 
24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
25  * has been linearized or cloned.
26  */
nf_br_ip_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,struct nf_bridge_frag_data * data,int (* output)(struct net *,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff *))27 static int nf_br_ip_fragment(struct net *net, struct sock *sk,
28 			     struct sk_buff *skb,
29 			     struct nf_bridge_frag_data *data,
30 			     int (*output)(struct net *, struct sock *sk,
31 					   const struct nf_bridge_frag_data *data,
32 					   struct sk_buff *))
33 {
34 	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
35 	bool mono_delivery_time = skb->mono_delivery_time;
36 	unsigned int hlen, ll_rs, mtu;
37 	ktime_t tstamp = skb->tstamp;
38 	struct ip_frag_state state;
39 	struct iphdr *iph;
40 	int err = 0;
41 
42 	/* for offloaded checksums cleanup checksum before fragmentation */
43 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
44 	    (err = skb_checksum_help(skb)))
45 		goto blackhole;
46 
47 	iph = ip_hdr(skb);
48 
49 	/*
50 	 *	Setup starting values
51 	 */
52 
53 	hlen = iph->ihl * 4;
54 	frag_max_size -= hlen;
55 	ll_rs = LL_RESERVED_SPACE(skb->dev);
56 	mtu = skb->dev->mtu;
57 
58 	if (skb_has_frag_list(skb)) {
59 		unsigned int first_len = skb_pagelen(skb);
60 		struct ip_fraglist_iter iter;
61 		struct sk_buff *frag;
62 
63 		if (first_len - hlen > mtu ||
64 		    skb_headroom(skb) < ll_rs)
65 			goto blackhole;
66 
67 		if (skb_cloned(skb))
68 			goto slow_path;
69 
70 		skb_walk_frags(skb, frag) {
71 			if (frag->len > mtu ||
72 			    skb_headroom(frag) < hlen + ll_rs)
73 				goto blackhole;
74 
75 			if (skb_shared(frag))
76 				goto slow_path;
77 		}
78 
79 		ip_fraglist_init(skb, iph, hlen, &iter);
80 
81 		for (;;) {
82 			if (iter.frag)
83 				ip_fraglist_prepare(skb, &iter);
84 
85 			skb_set_delivery_time(skb, tstamp, mono_delivery_time);
86 			err = output(net, sk, data, skb);
87 			if (err || !iter.frag)
88 				break;
89 
90 			skb = ip_fraglist_next(&iter);
91 		}
92 
93 		if (!err)
94 			return 0;
95 
96 		kfree_skb_list(iter.frag);
97 
98 		return err;
99 	}
100 slow_path:
101 	/* This is a linearized skbuff, the original geometry is lost for us.
102 	 * This may also be a clone skbuff, we could preserve the geometry for
103 	 * the copies but probably not worth the effort.
104 	 */
105 	ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
106 
107 	while (state.left > 0) {
108 		struct sk_buff *skb2;
109 
110 		skb2 = ip_frag_next(skb, &state);
111 		if (IS_ERR(skb2)) {
112 			err = PTR_ERR(skb2);
113 			goto blackhole;
114 		}
115 
116 		skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
117 		err = output(net, sk, data, skb2);
118 		if (err)
119 			goto blackhole;
120 	}
121 	consume_skb(skb);
122 	return err;
123 
124 blackhole:
125 	kfree_skb(skb);
126 	return 0;
127 }
128 
129 /* ip_defrag() expects IPCB() in place. */
br_skb_cb_save(struct sk_buff * skb,struct br_input_skb_cb * cb,size_t inet_skb_parm_size)130 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
131 			   size_t inet_skb_parm_size)
132 {
133 	memcpy(cb, skb->cb, sizeof(*cb));
134 	memset(skb->cb, 0, inet_skb_parm_size);
135 }
136 
br_skb_cb_restore(struct sk_buff * skb,const struct br_input_skb_cb * cb,u16 fragsz)137 static void br_skb_cb_restore(struct sk_buff *skb,
138 			      const struct br_input_skb_cb *cb,
139 			      u16 fragsz)
140 {
141 	memcpy(skb->cb, cb, sizeof(*cb));
142 	BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
143 }
144 
nf_ct_br_defrag4(struct sk_buff * skb,const struct nf_hook_state * state)145 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
146 				     const struct nf_hook_state *state)
147 {
148 	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
149 	enum ip_conntrack_info ctinfo;
150 	struct br_input_skb_cb cb;
151 	const struct nf_conn *ct;
152 	int err;
153 
154 	if (!ip_is_fragment(ip_hdr(skb)))
155 		return NF_ACCEPT;
156 
157 	ct = nf_ct_get(skb, &ctinfo);
158 	if (ct)
159 		zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
160 
161 	br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
162 	local_bh_disable();
163 	err = ip_defrag(state->net, skb,
164 			IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
165 	local_bh_enable();
166 	if (!err) {
167 		br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
168 		skb->ignore_df = 1;
169 		return NF_ACCEPT;
170 	}
171 
172 	return NF_STOLEN;
173 }
174 
nf_ct_br_defrag6(struct sk_buff * skb,const struct nf_hook_state * state)175 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
176 				     const struct nf_hook_state *state)
177 {
178 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
179 	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
180 	enum ip_conntrack_info ctinfo;
181 	struct br_input_skb_cb cb;
182 	const struct nf_conn *ct;
183 	int err;
184 
185 	ct = nf_ct_get(skb, &ctinfo);
186 	if (ct)
187 		zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
188 
189 	br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
190 
191 	err = nf_ct_frag6_gather(state->net, skb,
192 				 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
193 	/* queued */
194 	if (err == -EINPROGRESS)
195 		return NF_STOLEN;
196 
197 	br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
198 	return err == 0 ? NF_ACCEPT : NF_DROP;
199 #else
200 	return NF_ACCEPT;
201 #endif
202 }
203 
nf_ct_br_ip_check(const struct sk_buff * skb)204 static int nf_ct_br_ip_check(const struct sk_buff *skb)
205 {
206 	const struct iphdr *iph;
207 	int nhoff, len;
208 
209 	nhoff = skb_network_offset(skb);
210 	iph = ip_hdr(skb);
211 	if (iph->ihl < 5 ||
212 	    iph->version != 4)
213 		return -1;
214 
215 	len = skb_ip_totlen(skb);
216 	if (skb->len < nhoff + len ||
217 	    len < (iph->ihl * 4))
218                 return -1;
219 
220 	return 0;
221 }
222 
nf_ct_br_ipv6_check(const struct sk_buff * skb)223 static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
224 {
225 	const struct ipv6hdr *hdr;
226 	int nhoff, len;
227 
228 	nhoff = skb_network_offset(skb);
229 	hdr = ipv6_hdr(skb);
230 	if (hdr->version != 6)
231 		return -1;
232 
233 	len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
234 	if (skb->len < len)
235 		return -1;
236 
237 	return 0;
238 }
239 
nf_ct_bridge_pre(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)240 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
241 				     const struct nf_hook_state *state)
242 {
243 	struct nf_hook_state bridge_state = *state;
244 	enum ip_conntrack_info ctinfo;
245 	struct nf_conn *ct;
246 	u32 len;
247 	int ret;
248 
249 	ct = nf_ct_get(skb, &ctinfo);
250 	if ((ct && !nf_ct_is_template(ct)) ||
251 	    ctinfo == IP_CT_UNTRACKED)
252 		return NF_ACCEPT;
253 
254 	switch (skb->protocol) {
255 	case htons(ETH_P_IP):
256 		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
257 			return NF_ACCEPT;
258 
259 		len = skb_ip_totlen(skb);
260 		if (pskb_trim_rcsum(skb, len))
261 			return NF_ACCEPT;
262 
263 		if (nf_ct_br_ip_check(skb))
264 			return NF_ACCEPT;
265 
266 		bridge_state.pf = NFPROTO_IPV4;
267 		ret = nf_ct_br_defrag4(skb, &bridge_state);
268 		break;
269 	case htons(ETH_P_IPV6):
270 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
271 			return NF_ACCEPT;
272 
273 		len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
274 		if (pskb_trim_rcsum(skb, len))
275 			return NF_ACCEPT;
276 
277 		if (nf_ct_br_ipv6_check(skb))
278 			return NF_ACCEPT;
279 
280 		bridge_state.pf = NFPROTO_IPV6;
281 		ret = nf_ct_br_defrag6(skb, &bridge_state);
282 		break;
283 	default:
284 		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
285 		return NF_ACCEPT;
286 	}
287 
288 	if (ret != NF_ACCEPT)
289 		return ret;
290 
291 	return nf_conntrack_in(skb, &bridge_state);
292 }
293 
nf_ct_bridge_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)294 static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
295 				    const struct nf_hook_state *state)
296 {
297 	bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
298 	struct nf_conntrack *nfct = skb_nfct(skb);
299 	struct nf_conn *ct;
300 
301 	if (promisc) {
302 		nf_reset_ct(skb);
303 		return NF_ACCEPT;
304 	}
305 
306 	if (!nfct || skb->pkt_type == PACKET_HOST)
307 		return NF_ACCEPT;
308 
309 	/* nf_conntrack_confirm() cannot handle concurrent clones,
310 	 * this happens for broad/multicast frames with e.g. macvlan on top
311 	 * of the bridge device.
312 	 */
313 	ct = container_of(nfct, struct nf_conn, ct_general);
314 	if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
315 		return NF_ACCEPT;
316 
317 	/* let inet prerouting call conntrack again */
318 	skb->_nfct = 0;
319 	nf_ct_put(ct);
320 
321 	return NF_ACCEPT;
322 }
323 
nf_ct_bridge_frag_save(struct sk_buff * skb,struct nf_bridge_frag_data * data)324 static void nf_ct_bridge_frag_save(struct sk_buff *skb,
325 				   struct nf_bridge_frag_data *data)
326 {
327 	if (skb_vlan_tag_present(skb)) {
328 		data->vlan_present = true;
329 		data->vlan_tci = skb->vlan_tci;
330 		data->vlan_proto = skb->vlan_proto;
331 	} else {
332 		data->vlan_present = false;
333 	}
334 	skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
335 }
336 
337 static unsigned int
nf_ct_bridge_refrag(struct sk_buff * skb,const struct nf_hook_state * state,int (* output)(struct net *,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff *))338 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
339 		    int (*output)(struct net *, struct sock *sk,
340 				  const struct nf_bridge_frag_data *data,
341 				  struct sk_buff *))
342 {
343 	struct nf_bridge_frag_data data;
344 
345 	if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
346 		return NF_ACCEPT;
347 
348 	nf_ct_bridge_frag_save(skb, &data);
349 	switch (skb->protocol) {
350 	case htons(ETH_P_IP):
351 		nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
352 		break;
353 	case htons(ETH_P_IPV6):
354 		nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
355 		break;
356 	default:
357 		WARN_ON_ONCE(1);
358 		return NF_DROP;
359 	}
360 
361 	return NF_STOLEN;
362 }
363 
364 /* Actually only slow path refragmentation needs this. */
nf_ct_bridge_frag_restore(struct sk_buff * skb,const struct nf_bridge_frag_data * data)365 static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
366 				     const struct nf_bridge_frag_data *data)
367 {
368 	int err;
369 
370 	err = skb_cow_head(skb, ETH_HLEN);
371 	if (err) {
372 		kfree_skb(skb);
373 		return -ENOMEM;
374 	}
375 	if (data->vlan_present)
376 		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
377 	else if (skb_vlan_tag_present(skb))
378 		__vlan_hwaccel_clear_tag(skb);
379 
380 	skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
381 	skb_reset_mac_header(skb);
382 
383 	return 0;
384 }
385 
nf_ct_bridge_refrag_post(struct net * net,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff * skb)386 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
387 				    const struct nf_bridge_frag_data *data,
388 				    struct sk_buff *skb)
389 {
390 	int err;
391 
392 	err = nf_ct_bridge_frag_restore(skb, data);
393 	if (err < 0)
394 		return err;
395 
396 	return br_dev_queue_push_xmit(net, sk, skb);
397 }
398 
nf_ct_bridge_post(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)399 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
400 				      const struct nf_hook_state *state)
401 {
402 	int ret;
403 
404 	ret = nf_confirm(priv, skb, state);
405 	if (ret != NF_ACCEPT)
406 		return ret;
407 
408 	return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
409 }
410 
411 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
412 	{
413 		.hook		= nf_ct_bridge_pre,
414 		.pf		= NFPROTO_BRIDGE,
415 		.hooknum	= NF_BR_PRE_ROUTING,
416 		.priority	= NF_IP_PRI_CONNTRACK,
417 	},
418 	{
419 		.hook		= nf_ct_bridge_in,
420 		.pf		= NFPROTO_BRIDGE,
421 		.hooknum	= NF_BR_LOCAL_IN,
422 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
423 	},
424 	{
425 		.hook		= nf_ct_bridge_post,
426 		.pf		= NFPROTO_BRIDGE,
427 		.hooknum	= NF_BR_POST_ROUTING,
428 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
429 	},
430 };
431 
432 static struct nf_ct_bridge_info bridge_info = {
433 	.ops		= nf_ct_bridge_hook_ops,
434 	.ops_size	= ARRAY_SIZE(nf_ct_bridge_hook_ops),
435 	.me		= THIS_MODULE,
436 };
437 
nf_conntrack_l3proto_bridge_init(void)438 static int __init nf_conntrack_l3proto_bridge_init(void)
439 {
440 	nf_ct_bridge_register(&bridge_info);
441 
442 	return 0;
443 }
444 
nf_conntrack_l3proto_bridge_fini(void)445 static void __exit nf_conntrack_l3proto_bridge_fini(void)
446 {
447 	nf_ct_bridge_unregister(&bridge_info);
448 }
449 
450 module_init(nf_conntrack_l3proto_bridge_init);
451 module_exit(nf_conntrack_l3proto_bridge_fini);
452 
453 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
454 MODULE_LICENSE("GPL");
455