1 /*
2  *	Handle firewalling
3  *	Linux ethernet bridge
4  *
5  *	Authors:
6  *	Lennert Buytenhek		<buytenh@gnu.org>
7  *	Bart De Schuymer		<bdschuym@pandora.be>
8  *
9  *	This program is free software; you can redistribute it and/or
10  *	modify it under the terms of the GNU General Public License
11  *	as published by the Free Software Foundation; either version
12  *	2 of the License, or (at your option) any later version.
13  *
14  *	Lennert dedicates this file to Kerstin Wurdinger.
15  */
16 
17 #include <linux/module.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/ip.h>
21 #include <linux/netdevice.h>
22 #include <linux/skbuff.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_ether.h>
25 #include <linux/if_vlan.h>
26 #include <linux/if_pppox.h>
27 #include <linux/ppp_defs.h>
28 #include <linux/netfilter_bridge.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/netfilter_ipv6.h>
31 #include <linux/netfilter_arp.h>
32 #include <linux/in_route.h>
33 #include <linux/inetdevice.h>
34 
35 #include <net/ip.h>
36 #include <net/ipv6.h>
37 #include <net/addrconf.h>
38 #include <net/route.h>
39 #include <net/netfilter/br_netfilter.h>
40 
41 #include <asm/uaccess.h>
42 #include "br_private.h"
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #ifdef CONFIG_SYSCTL
48 static struct ctl_table_header *brnf_sysctl_header;
49 static int brnf_call_iptables __read_mostly = 1;
50 static int brnf_call_ip6tables __read_mostly = 1;
51 static int brnf_call_arptables __read_mostly = 1;
52 static int brnf_filter_vlan_tagged __read_mostly = 0;
53 static int brnf_filter_pppoe_tagged __read_mostly = 0;
54 static int brnf_pass_vlan_indev __read_mostly = 0;
55 #else
56 #define brnf_call_iptables 1
57 #define brnf_call_ip6tables 1
58 #define brnf_call_arptables 1
59 #define brnf_filter_vlan_tagged 0
60 #define brnf_filter_pppoe_tagged 0
61 #define brnf_pass_vlan_indev 0
62 #endif
63 
64 #define IS_IP(skb) \
65 	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
66 
67 #define IS_IPV6(skb) \
68 	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
69 
70 #define IS_ARP(skb) \
71 	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
72 
73 static inline __be16 vlan_proto(const struct sk_buff *skb)
74 {
75 	if (skb_vlan_tag_present(skb))
76 		return skb->protocol;
77 	else if (skb->protocol == htons(ETH_P_8021Q))
78 		return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
79 	else
80 		return 0;
81 }
82 
83 #define IS_VLAN_IP(skb) \
84 	(vlan_proto(skb) == htons(ETH_P_IP) && \
85 	 brnf_filter_vlan_tagged)
86 
87 #define IS_VLAN_IPV6(skb) \
88 	(vlan_proto(skb) == htons(ETH_P_IPV6) && \
89 	 brnf_filter_vlan_tagged)
90 
91 #define IS_VLAN_ARP(skb) \
92 	(vlan_proto(skb) == htons(ETH_P_ARP) &&	\
93 	 brnf_filter_vlan_tagged)
94 
95 static inline __be16 pppoe_proto(const struct sk_buff *skb)
96 {
97 	return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
98 			    sizeof(struct pppoe_hdr)));
99 }
100 
101 #define IS_PPPOE_IP(skb) \
102 	(skb->protocol == htons(ETH_P_PPP_SES) && \
103 	 pppoe_proto(skb) == htons(PPP_IP) && \
104 	 brnf_filter_pppoe_tagged)
105 
106 #define IS_PPPOE_IPV6(skb) \
107 	(skb->protocol == htons(ETH_P_PPP_SES) && \
108 	 pppoe_proto(skb) == htons(PPP_IPV6) && \
109 	 brnf_filter_pppoe_tagged)
110 
111 /* largest possible L2 header, see br_nf_dev_queue_xmit() */
112 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
113 
114 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
115 struct brnf_frag_data {
116 	char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
117 	u8 encap_size;
118 	u8 size;
119 	u16 vlan_tci;
120 	__be16 vlan_proto;
121 };
122 
123 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
124 #endif
125 
126 static void nf_bridge_info_free(struct sk_buff *skb)
127 {
128 	if (skb->nf_bridge) {
129 		nf_bridge_put(skb->nf_bridge);
130 		skb->nf_bridge = NULL;
131 	}
132 }
133 
134 static inline struct net_device *bridge_parent(const struct net_device *dev)
135 {
136 	struct net_bridge_port *port;
137 
138 	port = br_port_get_rcu(dev);
139 	return port ? port->br->dev : NULL;
140 }
141 
142 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
143 {
144 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
145 
146 	if (atomic_read(&nf_bridge->use) > 1) {
147 		struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
148 
149 		if (tmp) {
150 			memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
151 			atomic_set(&tmp->use, 1);
152 		}
153 		nf_bridge_put(nf_bridge);
154 		nf_bridge = tmp;
155 	}
156 	return nf_bridge;
157 }
158 
159 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
160 {
161 	switch (skb->protocol) {
162 	case __cpu_to_be16(ETH_P_8021Q):
163 		return VLAN_HLEN;
164 	case __cpu_to_be16(ETH_P_PPP_SES):
165 		return PPPOE_SES_HLEN;
166 	default:
167 		return 0;
168 	}
169 }
170 
171 static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
172 {
173 	unsigned int len = nf_bridge_encap_header_len(skb);
174 
175 	skb_pull(skb, len);
176 	skb->network_header += len;
177 }
178 
179 static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
180 {
181 	unsigned int len = nf_bridge_encap_header_len(skb);
182 
183 	skb_pull_rcsum(skb, len);
184 	skb->network_header += len;
185 }
186 
187 /* When handing a packet over to the IP layer
188  * check whether we have a skb that is in the
189  * expected format
190  */
191 
192 static int br_validate_ipv4(struct sk_buff *skb)
193 {
194 	const struct iphdr *iph;
195 	struct net_device *dev = skb->dev;
196 	u32 len;
197 
198 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
199 		goto inhdr_error;
200 
201 	iph = ip_hdr(skb);
202 
203 	/* Basic sanity checks */
204 	if (iph->ihl < 5 || iph->version != 4)
205 		goto inhdr_error;
206 
207 	if (!pskb_may_pull(skb, iph->ihl*4))
208 		goto inhdr_error;
209 
210 	iph = ip_hdr(skb);
211 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
212 		goto inhdr_error;
213 
214 	len = ntohs(iph->tot_len);
215 	if (skb->len < len) {
216 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
217 		goto drop;
218 	} else if (len < (iph->ihl*4))
219 		goto inhdr_error;
220 
221 	if (pskb_trim_rcsum(skb, len)) {
222 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
223 		goto drop;
224 	}
225 
226 	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
227 	/* We should really parse IP options here but until
228 	 * somebody who actually uses IP options complains to
229 	 * us we'll just silently ignore the options because
230 	 * we're lazy!
231 	 */
232 	return 0;
233 
234 inhdr_error:
235 	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
236 drop:
237 	return -1;
238 }
239 
240 void nf_bridge_update_protocol(struct sk_buff *skb)
241 {
242 	switch (skb->nf_bridge->orig_proto) {
243 	case BRNF_PROTO_8021Q:
244 		skb->protocol = htons(ETH_P_8021Q);
245 		break;
246 	case BRNF_PROTO_PPPOE:
247 		skb->protocol = htons(ETH_P_PPP_SES);
248 		break;
249 	case BRNF_PROTO_UNCHANGED:
250 		break;
251 	}
252 }
253 
254 /* Obtain the correct destination MAC address, while preserving the original
255  * source MAC address. If we already know this address, we just copy it. If we
256  * don't, we use the neighbour framework to find out. In both cases, we make
257  * sure that br_handle_frame_finish() is called afterwards.
258  */
259 int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
260 {
261 	struct neighbour *neigh;
262 	struct dst_entry *dst;
263 
264 	skb->dev = bridge_parent(skb->dev);
265 	if (!skb->dev)
266 		goto free_skb;
267 	dst = skb_dst(skb);
268 	neigh = dst_neigh_lookup_skb(dst, skb);
269 	if (neigh) {
270 		struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
271 		int ret;
272 
273 		if (neigh->hh.hh_len) {
274 			neigh_hh_bridge(&neigh->hh, skb);
275 			skb->dev = nf_bridge->physindev;
276 			ret = br_handle_frame_finish(sk, skb);
277 		} else {
278 			/* the neighbour function below overwrites the complete
279 			 * MAC header, so we save the Ethernet source address and
280 			 * protocol number.
281 			 */
282 			skb_copy_from_linear_data_offset(skb,
283 							 -(ETH_HLEN-ETH_ALEN),
284 							 nf_bridge->neigh_header,
285 							 ETH_HLEN-ETH_ALEN);
286 			/* tell br_dev_xmit to continue with forwarding */
287 			nf_bridge->mask |= BRNF_BRIDGED_DNAT;
288 			/* FIXME Need to refragment */
289 			ret = neigh->output(neigh, skb);
290 		}
291 		neigh_release(neigh);
292 		return ret;
293 	}
294 free_skb:
295 	kfree_skb(skb);
296 	return 0;
297 }
298 
299 static inline bool
300 br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
301 			     const struct nf_bridge_info *nf_bridge)
302 {
303 	return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
304 }
305 
306 /* This requires some explaining. If DNAT has taken place,
307  * we will need to fix up the destination Ethernet address.
308  * This is also true when SNAT takes place (for the reply direction).
309  *
310  * There are two cases to consider:
311  * 1. The packet was DNAT'ed to a device in the same bridge
312  *    port group as it was received on. We can still bridge
313  *    the packet.
314  * 2. The packet was DNAT'ed to a different device, either
315  *    a non-bridged device or another bridge port group.
316  *    The packet will need to be routed.
317  *
318  * The correct way of distinguishing between these two cases is to
319  * call ip_route_input() and to look at skb->dst->dev, which is
320  * changed to the destination device if ip_route_input() succeeds.
321  *
322  * Let's first consider the case that ip_route_input() succeeds:
323  *
324  * If the output device equals the logical bridge device the packet
325  * came in on, we can consider this bridging. The corresponding MAC
326  * address will be obtained in br_nf_pre_routing_finish_bridge.
327  * Otherwise, the packet is considered to be routed and we just
328  * change the destination MAC address so that the packet will
329  * later be passed up to the IP stack to be routed. For a redirected
330  * packet, ip_route_input() will give back the localhost as output device,
331  * which differs from the bridge device.
332  *
333  * Let's now consider the case that ip_route_input() fails:
334  *
335  * This can be because the destination address is martian, in which case
336  * the packet will be dropped.
337  * If IP forwarding is disabled, ip_route_input() will fail, while
338  * ip_route_output_key() can return success. The source
339  * address for ip_route_output_key() is set to zero, so ip_route_output_key()
340  * thinks we're handling a locally generated packet and won't care
341  * if IP forwarding is enabled. If the output device equals the logical bridge
342  * device, we proceed as if ip_route_input() succeeded. If it differs from the
343  * logical bridge port or if ip_route_output_key() fails we drop the packet.
344  */
345 static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
346 {
347 	struct net_device *dev = skb->dev;
348 	struct iphdr *iph = ip_hdr(skb);
349 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
350 	struct rtable *rt;
351 	int err;
352 
353 	nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
354 
355 	if (nf_bridge->pkt_otherhost) {
356 		skb->pkt_type = PACKET_OTHERHOST;
357 		nf_bridge->pkt_otherhost = false;
358 	}
359 	nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
360 	if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
361 		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
362 			struct in_device *in_dev = __in_dev_get_rcu(dev);
363 
364 			/* If err equals -EHOSTUNREACH the error is due to a
365 			 * martian destination or due to the fact that
366 			 * forwarding is disabled. For most martian packets,
367 			 * ip_route_output_key() will fail. It won't fail for 2 types of
368 			 * martian destinations: loopback destinations and destination
369 			 * 0.0.0.0. In both cases the packet will be dropped because the
370 			 * destination is the loopback device and not the bridge. */
371 			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
372 				goto free_skb;
373 
374 			rt = ip_route_output(dev_net(dev), iph->daddr, 0,
375 					     RT_TOS(iph->tos), 0);
376 			if (!IS_ERR(rt)) {
377 				/* - Bridged-and-DNAT'ed traffic doesn't
378 				 *   require ip_forwarding. */
379 				if (rt->dst.dev == dev) {
380 					skb_dst_set(skb, &rt->dst);
381 					goto bridged_dnat;
382 				}
383 				ip_rt_put(rt);
384 			}
385 free_skb:
386 			kfree_skb(skb);
387 			return 0;
388 		} else {
389 			if (skb_dst(skb)->dev == dev) {
390 bridged_dnat:
391 				skb->dev = nf_bridge->physindev;
392 				nf_bridge_update_protocol(skb);
393 				nf_bridge_push_encap_header(skb);
394 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
395 					       NF_BR_PRE_ROUTING,
396 					       sk, skb, skb->dev, NULL,
397 					       br_nf_pre_routing_finish_bridge,
398 					       1);
399 				return 0;
400 			}
401 			ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
402 			skb->pkt_type = PACKET_HOST;
403 		}
404 	} else {
405 		rt = bridge_parent_rtable(nf_bridge->physindev);
406 		if (!rt) {
407 			kfree_skb(skb);
408 			return 0;
409 		}
410 		skb_dst_set_noref(skb, &rt->dst);
411 	}
412 
413 	skb->dev = nf_bridge->physindev;
414 	nf_bridge_update_protocol(skb);
415 	nf_bridge_push_encap_header(skb);
416 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
417 		       skb->dev, NULL,
418 		       br_handle_frame_finish, 1);
419 
420 	return 0;
421 }
422 
423 static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev)
424 {
425 	struct net_device *vlan, *br;
426 
427 	br = bridge_parent(dev);
428 	if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
429 		return br;
430 
431 	vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
432 				    skb_vlan_tag_get(skb) & VLAN_VID_MASK);
433 
434 	return vlan ? vlan : br;
435 }
436 
437 /* Some common code for IPv4/IPv6 */
438 struct net_device *setup_pre_routing(struct sk_buff *skb)
439 {
440 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
441 
442 	if (skb->pkt_type == PACKET_OTHERHOST) {
443 		skb->pkt_type = PACKET_HOST;
444 		nf_bridge->pkt_otherhost = true;
445 	}
446 
447 	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
448 	nf_bridge->physindev = skb->dev;
449 	skb->dev = brnf_get_logical_dev(skb, skb->dev);
450 
451 	if (skb->protocol == htons(ETH_P_8021Q))
452 		nf_bridge->orig_proto = BRNF_PROTO_8021Q;
453 	else if (skb->protocol == htons(ETH_P_PPP_SES))
454 		nf_bridge->orig_proto = BRNF_PROTO_PPPOE;
455 
456 	/* Must drop socket now because of tproxy. */
457 	skb_orphan(skb);
458 	return skb->dev;
459 }
460 
461 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
462  * Replicate the checks that IPv4 does on packet reception.
463  * Set skb->dev to the bridge device (i.e. parent of the
464  * receiving device) to make netfilter happy, the REDIRECT
465  * target in particular.  Save the original destination IP
466  * address to be able to detect DNAT afterwards. */
467 static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
468 				      struct sk_buff *skb,
469 				      const struct nf_hook_state *state)
470 {
471 	struct nf_bridge_info *nf_bridge;
472 	struct net_bridge_port *p;
473 	struct net_bridge *br;
474 	__u32 len = nf_bridge_encap_header_len(skb);
475 
476 	if (unlikely(!pskb_may_pull(skb, len)))
477 		return NF_DROP;
478 
479 	p = br_port_get_rcu(state->in);
480 	if (p == NULL)
481 		return NF_DROP;
482 	br = p->br;
483 
484 	if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) {
485 		if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
486 			return NF_ACCEPT;
487 
488 		nf_bridge_pull_encap_header_rcsum(skb);
489 		return br_nf_pre_routing_ipv6(ops, skb, state);
490 	}
491 
492 	if (!brnf_call_iptables && !br->nf_call_iptables)
493 		return NF_ACCEPT;
494 
495 	if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
496 		return NF_ACCEPT;
497 
498 	nf_bridge_pull_encap_header_rcsum(skb);
499 
500 	if (br_validate_ipv4(skb))
501 		return NF_DROP;
502 
503 	nf_bridge_put(skb->nf_bridge);
504 	if (!nf_bridge_alloc(skb))
505 		return NF_DROP;
506 	if (!setup_pre_routing(skb))
507 		return NF_DROP;
508 
509 	nf_bridge = nf_bridge_info_get(skb);
510 	nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
511 
512 	skb->protocol = htons(ETH_P_IP);
513 
514 	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb,
515 		skb->dev, NULL,
516 		br_nf_pre_routing_finish);
517 
518 	return NF_STOLEN;
519 }
520 
521 
522 /* PF_BRIDGE/LOCAL_IN ************************************************/
523 /* The packet is locally destined, which requires a real
524  * dst_entry, so detach the fake one.  On the way up, the
525  * packet would pass through PRE_ROUTING again (which already
526  * took place when the packet entered the bridge), but we
527  * register an IPv4 PRE_ROUTING 'sabotage' hook that will
528  * prevent this from happening. */
529 static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
530 				   struct sk_buff *skb,
531 				   const struct nf_hook_state *state)
532 {
533 	br_drop_fake_rtable(skb);
534 	return NF_ACCEPT;
535 }
536 
537 /* PF_BRIDGE/FORWARD *************************************************/
538 static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
539 {
540 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
541 	struct net_device *in;
542 
543 	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
544 
545 		if (skb->protocol == htons(ETH_P_IP))
546 			nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
547 
548 		if (skb->protocol == htons(ETH_P_IPV6))
549 			nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
550 
551 		in = nf_bridge->physindev;
552 		if (nf_bridge->pkt_otherhost) {
553 			skb->pkt_type = PACKET_OTHERHOST;
554 			nf_bridge->pkt_otherhost = false;
555 		}
556 		nf_bridge_update_protocol(skb);
557 	} else {
558 		in = *((struct net_device **)(skb->cb));
559 	}
560 	nf_bridge_push_encap_header(skb);
561 
562 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb,
563 		       in, skb->dev, br_forward_finish, 1);
564 	return 0;
565 }
566 
567 
568 /* This is the 'purely bridged' case.  For IP, we pass the packet to
569  * netfilter with indev and outdev set to the bridge device,
570  * but we are still able to filter on the 'real' indev/outdev
571  * because of the physdev module. For ARP, indev and outdev are the
572  * bridge ports. */
573 static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
574 				     struct sk_buff *skb,
575 				     const struct nf_hook_state *state)
576 {
577 	struct nf_bridge_info *nf_bridge;
578 	struct net_device *parent;
579 	u_int8_t pf;
580 
581 	if (!skb->nf_bridge)
582 		return NF_ACCEPT;
583 
584 	/* Need exclusive nf_bridge_info since we might have multiple
585 	 * different physoutdevs. */
586 	if (!nf_bridge_unshare(skb))
587 		return NF_DROP;
588 
589 	nf_bridge = nf_bridge_info_get(skb);
590 	if (!nf_bridge)
591 		return NF_DROP;
592 
593 	parent = bridge_parent(state->out);
594 	if (!parent)
595 		return NF_DROP;
596 
597 	if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
598 		pf = NFPROTO_IPV4;
599 	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
600 		pf = NFPROTO_IPV6;
601 	else
602 		return NF_ACCEPT;
603 
604 	nf_bridge_pull_encap_header(skb);
605 
606 	if (skb->pkt_type == PACKET_OTHERHOST) {
607 		skb->pkt_type = PACKET_HOST;
608 		nf_bridge->pkt_otherhost = true;
609 	}
610 
611 	if (pf == NFPROTO_IPV4) {
612 		if (br_validate_ipv4(skb))
613 			return NF_DROP;
614 		IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
615 	}
616 
617 	if (pf == NFPROTO_IPV6) {
618 		if (br_validate_ipv6(skb))
619 			return NF_DROP;
620 		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
621 	}
622 
623 	nf_bridge->physoutdev = skb->dev;
624 	if (pf == NFPROTO_IPV4)
625 		skb->protocol = htons(ETH_P_IP);
626 	else
627 		skb->protocol = htons(ETH_P_IPV6);
628 
629 	NF_HOOK(pf, NF_INET_FORWARD, NULL, skb,
630 		brnf_get_logical_dev(skb, state->in),
631 		parent,	br_nf_forward_finish);
632 
633 	return NF_STOLEN;
634 }
635 
636 static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
637 				      struct sk_buff *skb,
638 				      const struct nf_hook_state *state)
639 {
640 	struct net_bridge_port *p;
641 	struct net_bridge *br;
642 	struct net_device **d = (struct net_device **)(skb->cb);
643 
644 	p = br_port_get_rcu(state->out);
645 	if (p == NULL)
646 		return NF_ACCEPT;
647 	br = p->br;
648 
649 	if (!brnf_call_arptables && !br->nf_call_arptables)
650 		return NF_ACCEPT;
651 
652 	if (!IS_ARP(skb)) {
653 		if (!IS_VLAN_ARP(skb))
654 			return NF_ACCEPT;
655 		nf_bridge_pull_encap_header(skb);
656 	}
657 
658 	if (arp_hdr(skb)->ar_pln != 4) {
659 		if (IS_VLAN_ARP(skb))
660 			nf_bridge_push_encap_header(skb);
661 		return NF_ACCEPT;
662 	}
663 	*d = state->in;
664 	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb,
665 		state->in, state->out, br_nf_forward_finish);
666 
667 	return NF_STOLEN;
668 }
669 
670 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
671 static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
672 {
673 	struct brnf_frag_data *data;
674 	int err;
675 
676 	data = this_cpu_ptr(&brnf_frag_data_storage);
677 	err = skb_cow_head(skb, data->size);
678 
679 	if (err) {
680 		kfree_skb(skb);
681 		return 0;
682 	}
683 
684 	if (data->vlan_tci) {
685 		skb->vlan_tci = data->vlan_tci;
686 		skb->vlan_proto = data->vlan_proto;
687 	}
688 
689 	skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
690 	__skb_push(skb, data->encap_size);
691 
692 	nf_bridge_info_free(skb);
693 	return br_dev_queue_push_xmit(sk, skb);
694 }
695 #endif
696 
697 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
698 static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
699 			     int (*output)(struct sock *, struct sk_buff *))
700 {
701 	unsigned int mtu = ip_skb_dst_mtu(skb);
702 	struct iphdr *iph = ip_hdr(skb);
703 	struct rtable *rt = skb_rtable(skb);
704 	struct net_device *dev = rt->dst.dev;
705 
706 	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
707 		     (IPCB(skb)->frag_max_size &&
708 		      IPCB(skb)->frag_max_size > mtu))) {
709 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
710 		kfree_skb(skb);
711 		return -EMSGSIZE;
712 	}
713 
714 	return ip_do_fragment(sk, skb, output);
715 }
716 #endif
717 
718 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
719 {
720 	if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
721 		return PPPOE_SES_HLEN;
722 	return 0;
723 }
724 
725 static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
726 {
727 	struct nf_bridge_info *nf_bridge;
728 	unsigned int mtu_reserved;
729 
730 	mtu_reserved = nf_bridge_mtu_reduction(skb);
731 
732 	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
733 		nf_bridge_info_free(skb);
734 		return br_dev_queue_push_xmit(sk, skb);
735 	}
736 
737 	nf_bridge = nf_bridge_info_get(skb);
738 
739 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
740 	/* This is wrong! We should preserve the original fragment
741 	 * boundaries by preserving frag_list rather than refragmenting.
742 	 */
743 	if (skb->protocol == htons(ETH_P_IP)) {
744 		struct brnf_frag_data *data;
745 
746 		if (br_validate_ipv4(skb))
747 			return NF_DROP;
748 
749 		IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
750 
751 		nf_bridge_update_protocol(skb);
752 
753 		data = this_cpu_ptr(&brnf_frag_data_storage);
754 
755 		data->vlan_tci = skb->vlan_tci;
756 		data->vlan_proto = skb->vlan_proto;
757 		data->encap_size = nf_bridge_encap_header_len(skb);
758 		data->size = ETH_HLEN + data->encap_size;
759 
760 		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
761 						 data->size);
762 
763 		return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
764 	}
765 #endif
766 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
767 	if (skb->protocol == htons(ETH_P_IPV6)) {
768 		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
769 		struct brnf_frag_data *data;
770 
771 		if (br_validate_ipv6(skb))
772 			return NF_DROP;
773 
774 		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
775 
776 		nf_bridge_update_protocol(skb);
777 
778 		data = this_cpu_ptr(&brnf_frag_data_storage);
779 		data->encap_size = nf_bridge_encap_header_len(skb);
780 		data->size = ETH_HLEN + data->encap_size;
781 
782 		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
783 						 data->size);
784 
785 		if (v6ops)
786 			return v6ops->fragment(sk, skb, br_nf_push_frag_xmit);
787 		else
788 			return -EMSGSIZE;
789 	}
790 #endif
791 	nf_bridge_info_free(skb);
792 	return br_dev_queue_push_xmit(sk, skb);
793 }
794 
795 /* PF_BRIDGE/POST_ROUTING ********************************************/
796 static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
797 				       struct sk_buff *skb,
798 				       const struct nf_hook_state *state)
799 {
800 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
801 	struct net_device *realoutdev = bridge_parent(skb->dev);
802 	u_int8_t pf;
803 
804 	/* if nf_bridge is set, but ->physoutdev is NULL, this packet came in
805 	 * on a bridge, but was delivered locally and is now being routed:
806 	 *
807 	 * POST_ROUTING was already invoked from the ip stack.
808 	 */
809 	if (!nf_bridge || !nf_bridge->physoutdev)
810 		return NF_ACCEPT;
811 
812 	if (!realoutdev)
813 		return NF_DROP;
814 
815 	if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
816 		pf = NFPROTO_IPV4;
817 	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
818 		pf = NFPROTO_IPV6;
819 	else
820 		return NF_ACCEPT;
821 
822 	/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
823 	 * about the value of skb->pkt_type. */
824 	if (skb->pkt_type == PACKET_OTHERHOST) {
825 		skb->pkt_type = PACKET_HOST;
826 		nf_bridge->pkt_otherhost = true;
827 	}
828 
829 	nf_bridge_pull_encap_header(skb);
830 	if (pf == NFPROTO_IPV4)
831 		skb->protocol = htons(ETH_P_IP);
832 	else
833 		skb->protocol = htons(ETH_P_IPV6);
834 
835 	NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb,
836 		NULL, realoutdev,
837 		br_nf_dev_queue_xmit);
838 
839 	return NF_STOLEN;
840 }
841 
842 /* IP/SABOTAGE *****************************************************/
843 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
844  * for the second time. */
845 static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
846 				   struct sk_buff *skb,
847 				   const struct nf_hook_state *state)
848 {
849 	if (skb->nf_bridge &&
850 	    !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
851 		return NF_STOP;
852 	}
853 
854 	return NF_ACCEPT;
855 }
856 
857 /* This is called when br_netfilter has called into iptables/netfilter,
858  * and DNAT has taken place on a bridge-forwarded packet.
859  *
860  * neigh->output has created a new MAC header, with local br0 MAC
861  * as saddr.
862  *
863  * This restores the original MAC saddr of the bridged packet
864  * before invoking bridge forward logic to transmit the packet.
865  */
866 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
867 {
868 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
869 
870 	skb_pull(skb, ETH_HLEN);
871 	nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
872 
873 	BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
874 
875 	skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
876 				       nf_bridge->neigh_header,
877 				       ETH_HLEN - ETH_ALEN);
878 	skb->dev = nf_bridge->physindev;
879 
880 	nf_bridge->physoutdev = NULL;
881 	br_handle_frame_finish(NULL, skb);
882 }
883 
884 static int br_nf_dev_xmit(struct sk_buff *skb)
885 {
886 	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
887 		br_nf_pre_routing_finish_bridge_slow(skb);
888 		return 1;
889 	}
890 	return 0;
891 }
892 
893 static const struct nf_br_ops br_ops = {
894 	.br_dev_xmit_hook =	br_nf_dev_xmit,
895 };
896 
897 void br_netfilter_enable(void)
898 {
899 }
900 EXPORT_SYMBOL_GPL(br_netfilter_enable);
901 
902 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
903  * br_dev_queue_push_xmit is called afterwards */
904 static struct nf_hook_ops br_nf_ops[] __read_mostly = {
905 	{
906 		.hook = br_nf_pre_routing,
907 		.owner = THIS_MODULE,
908 		.pf = NFPROTO_BRIDGE,
909 		.hooknum = NF_BR_PRE_ROUTING,
910 		.priority = NF_BR_PRI_BRNF,
911 	},
912 	{
913 		.hook = br_nf_local_in,
914 		.owner = THIS_MODULE,
915 		.pf = NFPROTO_BRIDGE,
916 		.hooknum = NF_BR_LOCAL_IN,
917 		.priority = NF_BR_PRI_BRNF,
918 	},
919 	{
920 		.hook = br_nf_forward_ip,
921 		.owner = THIS_MODULE,
922 		.pf = NFPROTO_BRIDGE,
923 		.hooknum = NF_BR_FORWARD,
924 		.priority = NF_BR_PRI_BRNF - 1,
925 	},
926 	{
927 		.hook = br_nf_forward_arp,
928 		.owner = THIS_MODULE,
929 		.pf = NFPROTO_BRIDGE,
930 		.hooknum = NF_BR_FORWARD,
931 		.priority = NF_BR_PRI_BRNF,
932 	},
933 	{
934 		.hook = br_nf_post_routing,
935 		.owner = THIS_MODULE,
936 		.pf = NFPROTO_BRIDGE,
937 		.hooknum = NF_BR_POST_ROUTING,
938 		.priority = NF_BR_PRI_LAST,
939 	},
940 	{
941 		.hook = ip_sabotage_in,
942 		.owner = THIS_MODULE,
943 		.pf = NFPROTO_IPV4,
944 		.hooknum = NF_INET_PRE_ROUTING,
945 		.priority = NF_IP_PRI_FIRST,
946 	},
947 	{
948 		.hook = ip_sabotage_in,
949 		.owner = THIS_MODULE,
950 		.pf = NFPROTO_IPV6,
951 		.hooknum = NF_INET_PRE_ROUTING,
952 		.priority = NF_IP6_PRI_FIRST,
953 	},
954 };
955 
956 #ifdef CONFIG_SYSCTL
957 static
958 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
959 			    void __user *buffer, size_t *lenp, loff_t *ppos)
960 {
961 	int ret;
962 
963 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
964 
965 	if (write && *(int *)(ctl->data))
966 		*(int *)(ctl->data) = 1;
967 	return ret;
968 }
969 
970 static struct ctl_table brnf_table[] = {
971 	{
972 		.procname	= "bridge-nf-call-arptables",
973 		.data		= &brnf_call_arptables,
974 		.maxlen		= sizeof(int),
975 		.mode		= 0644,
976 		.proc_handler	= brnf_sysctl_call_tables,
977 	},
978 	{
979 		.procname	= "bridge-nf-call-iptables",
980 		.data		= &brnf_call_iptables,
981 		.maxlen		= sizeof(int),
982 		.mode		= 0644,
983 		.proc_handler	= brnf_sysctl_call_tables,
984 	},
985 	{
986 		.procname	= "bridge-nf-call-ip6tables",
987 		.data		= &brnf_call_ip6tables,
988 		.maxlen		= sizeof(int),
989 		.mode		= 0644,
990 		.proc_handler	= brnf_sysctl_call_tables,
991 	},
992 	{
993 		.procname	= "bridge-nf-filter-vlan-tagged",
994 		.data		= &brnf_filter_vlan_tagged,
995 		.maxlen		= sizeof(int),
996 		.mode		= 0644,
997 		.proc_handler	= brnf_sysctl_call_tables,
998 	},
999 	{
1000 		.procname	= "bridge-nf-filter-pppoe-tagged",
1001 		.data		= &brnf_filter_pppoe_tagged,
1002 		.maxlen		= sizeof(int),
1003 		.mode		= 0644,
1004 		.proc_handler	= brnf_sysctl_call_tables,
1005 	},
1006 	{
1007 		.procname	= "bridge-nf-pass-vlan-input-dev",
1008 		.data		= &brnf_pass_vlan_indev,
1009 		.maxlen		= sizeof(int),
1010 		.mode		= 0644,
1011 		.proc_handler	= brnf_sysctl_call_tables,
1012 	},
1013 	{ }
1014 };
1015 #endif
1016 
1017 static int __init br_netfilter_init(void)
1018 {
1019 	int ret;
1020 
1021 	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1022 	if (ret < 0)
1023 		return ret;
1024 
1025 #ifdef CONFIG_SYSCTL
1026 	brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
1027 	if (brnf_sysctl_header == NULL) {
1028 		printk(KERN_WARNING
1029 		       "br_netfilter: can't register to sysctl.\n");
1030 		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1031 		return -ENOMEM;
1032 	}
1033 #endif
1034 	RCU_INIT_POINTER(nf_br_ops, &br_ops);
1035 	printk(KERN_NOTICE "Bridge firewalling registered\n");
1036 	return 0;
1037 }
1038 
1039 static void __exit br_netfilter_fini(void)
1040 {
1041 	RCU_INIT_POINTER(nf_br_ops, NULL);
1042 	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1043 #ifdef CONFIG_SYSCTL
1044 	unregister_net_sysctl_table(brnf_sysctl_header);
1045 #endif
1046 }
1047 
1048 module_init(br_netfilter_init);
1049 module_exit(br_netfilter_fini);
1050 
1051 MODULE_LICENSE("GPL");
1052 MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
1053 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
1054 MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
1055