1 /* 2 * Handle firewalling 3 * Linux ethernet bridge 4 * 5 * Authors: 6 * Lennert Buytenhek <buytenh@gnu.org> 7 * Bart De Schuymer <bdschuym@pandora.be> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 * 14 * Lennert dedicates this file to Kerstin Wurdinger. 15 */ 16 17 #include <linux/module.h> 18 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/ip.h> 21 #include <linux/netdevice.h> 22 #include <linux/skbuff.h> 23 #include <linux/if_arp.h> 24 #include <linux/if_ether.h> 25 #include <linux/if_vlan.h> 26 #include <linux/if_pppox.h> 27 #include <linux/ppp_defs.h> 28 #include <linux/netfilter_bridge.h> 29 #include <linux/netfilter_ipv4.h> 30 #include <linux/netfilter_ipv6.h> 31 #include <linux/netfilter_arp.h> 32 #include <linux/in_route.h> 33 #include <linux/inetdevice.h> 34 35 #include <net/ip.h> 36 #include <net/ipv6.h> 37 #include <net/addrconf.h> 38 #include <net/route.h> 39 #include <net/netfilter/br_netfilter.h> 40 41 #include <asm/uaccess.h> 42 #include "br_private.h" 43 #ifdef CONFIG_SYSCTL 44 #include <linux/sysctl.h> 45 #endif 46 47 #ifdef CONFIG_SYSCTL 48 static struct ctl_table_header *brnf_sysctl_header; 49 static int brnf_call_iptables __read_mostly = 1; 50 static int brnf_call_ip6tables __read_mostly = 1; 51 static int brnf_call_arptables __read_mostly = 1; 52 static int brnf_filter_vlan_tagged __read_mostly = 0; 53 static int brnf_filter_pppoe_tagged __read_mostly = 0; 54 static int brnf_pass_vlan_indev __read_mostly = 0; 55 #else 56 #define brnf_call_iptables 1 57 #define brnf_call_ip6tables 1 58 #define brnf_call_arptables 1 59 #define brnf_filter_vlan_tagged 0 60 #define brnf_filter_pppoe_tagged 0 61 #define brnf_pass_vlan_indev 0 62 #endif 63 64 #define IS_IP(skb) \ 65 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) 66 67 #define IS_IPV6(skb) \ 68 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) 69 70 #define IS_ARP(skb) \ 71 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) 72 73 static inline __be16 vlan_proto(const struct sk_buff *skb) 74 { 75 if (skb_vlan_tag_present(skb)) 76 return skb->protocol; 77 else if (skb->protocol == htons(ETH_P_8021Q)) 78 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 79 else 80 return 0; 81 } 82 83 #define IS_VLAN_IP(skb) \ 84 (vlan_proto(skb) == htons(ETH_P_IP) && \ 85 brnf_filter_vlan_tagged) 86 87 #define IS_VLAN_IPV6(skb) \ 88 (vlan_proto(skb) == htons(ETH_P_IPV6) && \ 89 brnf_filter_vlan_tagged) 90 91 #define IS_VLAN_ARP(skb) \ 92 (vlan_proto(skb) == htons(ETH_P_ARP) && \ 93 brnf_filter_vlan_tagged) 94 95 static inline __be16 pppoe_proto(const struct sk_buff *skb) 96 { 97 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 98 sizeof(struct pppoe_hdr))); 99 } 100 101 #define IS_PPPOE_IP(skb) \ 102 (skb->protocol == htons(ETH_P_PPP_SES) && \ 103 pppoe_proto(skb) == htons(PPP_IP) && \ 104 brnf_filter_pppoe_tagged) 105 106 #define IS_PPPOE_IPV6(skb) \ 107 (skb->protocol == htons(ETH_P_PPP_SES) && \ 108 pppoe_proto(skb) == htons(PPP_IPV6) && \ 109 brnf_filter_pppoe_tagged) 110 111 /* largest possible L2 header, see br_nf_dev_queue_xmit() */ 112 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 113 114 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 115 struct brnf_frag_data { 116 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 117 u8 encap_size; 118 u8 size; 119 u16 vlan_tci; 120 __be16 vlan_proto; 121 }; 122 123 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 124 #endif 125 126 static void nf_bridge_info_free(struct sk_buff *skb) 127 { 128 if (skb->nf_bridge) { 129 nf_bridge_put(skb->nf_bridge); 130 skb->nf_bridge = NULL; 131 } 132 } 133 134 static inline struct net_device *bridge_parent(const struct net_device *dev) 135 { 136 struct net_bridge_port *port; 137 138 port = br_port_get_rcu(dev); 139 return port ? port->br->dev : NULL; 140 } 141 142 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) 143 { 144 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 145 146 if (atomic_read(&nf_bridge->use) > 1) { 147 struct nf_bridge_info *tmp = nf_bridge_alloc(skb); 148 149 if (tmp) { 150 memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); 151 atomic_set(&tmp->use, 1); 152 } 153 nf_bridge_put(nf_bridge); 154 nf_bridge = tmp; 155 } 156 return nf_bridge; 157 } 158 159 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 160 { 161 switch (skb->protocol) { 162 case __cpu_to_be16(ETH_P_8021Q): 163 return VLAN_HLEN; 164 case __cpu_to_be16(ETH_P_PPP_SES): 165 return PPPOE_SES_HLEN; 166 default: 167 return 0; 168 } 169 } 170 171 static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) 172 { 173 unsigned int len = nf_bridge_encap_header_len(skb); 174 175 skb_pull(skb, len); 176 skb->network_header += len; 177 } 178 179 static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) 180 { 181 unsigned int len = nf_bridge_encap_header_len(skb); 182 183 skb_pull_rcsum(skb, len); 184 skb->network_header += len; 185 } 186 187 /* When handing a packet over to the IP layer 188 * check whether we have a skb that is in the 189 * expected format 190 */ 191 192 static int br_validate_ipv4(struct sk_buff *skb) 193 { 194 const struct iphdr *iph; 195 struct net_device *dev = skb->dev; 196 u32 len; 197 198 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 199 goto inhdr_error; 200 201 iph = ip_hdr(skb); 202 203 /* Basic sanity checks */ 204 if (iph->ihl < 5 || iph->version != 4) 205 goto inhdr_error; 206 207 if (!pskb_may_pull(skb, iph->ihl*4)) 208 goto inhdr_error; 209 210 iph = ip_hdr(skb); 211 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 212 goto inhdr_error; 213 214 len = ntohs(iph->tot_len); 215 if (skb->len < len) { 216 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); 217 goto drop; 218 } else if (len < (iph->ihl*4)) 219 goto inhdr_error; 220 221 if (pskb_trim_rcsum(skb, len)) { 222 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); 223 goto drop; 224 } 225 226 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 227 /* We should really parse IP options here but until 228 * somebody who actually uses IP options complains to 229 * us we'll just silently ignore the options because 230 * we're lazy! 231 */ 232 return 0; 233 234 inhdr_error: 235 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); 236 drop: 237 return -1; 238 } 239 240 void nf_bridge_update_protocol(struct sk_buff *skb) 241 { 242 switch (skb->nf_bridge->orig_proto) { 243 case BRNF_PROTO_8021Q: 244 skb->protocol = htons(ETH_P_8021Q); 245 break; 246 case BRNF_PROTO_PPPOE: 247 skb->protocol = htons(ETH_P_PPP_SES); 248 break; 249 case BRNF_PROTO_UNCHANGED: 250 break; 251 } 252 } 253 254 /* Obtain the correct destination MAC address, while preserving the original 255 * source MAC address. If we already know this address, we just copy it. If we 256 * don't, we use the neighbour framework to find out. In both cases, we make 257 * sure that br_handle_frame_finish() is called afterwards. 258 */ 259 int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) 260 { 261 struct neighbour *neigh; 262 struct dst_entry *dst; 263 264 skb->dev = bridge_parent(skb->dev); 265 if (!skb->dev) 266 goto free_skb; 267 dst = skb_dst(skb); 268 neigh = dst_neigh_lookup_skb(dst, skb); 269 if (neigh) { 270 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 271 int ret; 272 273 if (neigh->hh.hh_len) { 274 neigh_hh_bridge(&neigh->hh, skb); 275 skb->dev = nf_bridge->physindev; 276 ret = br_handle_frame_finish(sk, skb); 277 } else { 278 /* the neighbour function below overwrites the complete 279 * MAC header, so we save the Ethernet source address and 280 * protocol number. 281 */ 282 skb_copy_from_linear_data_offset(skb, 283 -(ETH_HLEN-ETH_ALEN), 284 nf_bridge->neigh_header, 285 ETH_HLEN-ETH_ALEN); 286 /* tell br_dev_xmit to continue with forwarding */ 287 nf_bridge->mask |= BRNF_BRIDGED_DNAT; 288 /* FIXME Need to refragment */ 289 ret = neigh->output(neigh, skb); 290 } 291 neigh_release(neigh); 292 return ret; 293 } 294 free_skb: 295 kfree_skb(skb); 296 return 0; 297 } 298 299 static inline bool 300 br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, 301 const struct nf_bridge_info *nf_bridge) 302 { 303 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; 304 } 305 306 /* This requires some explaining. If DNAT has taken place, 307 * we will need to fix up the destination Ethernet address. 308 * This is also true when SNAT takes place (for the reply direction). 309 * 310 * There are two cases to consider: 311 * 1. The packet was DNAT'ed to a device in the same bridge 312 * port group as it was received on. We can still bridge 313 * the packet. 314 * 2. The packet was DNAT'ed to a different device, either 315 * a non-bridged device or another bridge port group. 316 * The packet will need to be routed. 317 * 318 * The correct way of distinguishing between these two cases is to 319 * call ip_route_input() and to look at skb->dst->dev, which is 320 * changed to the destination device if ip_route_input() succeeds. 321 * 322 * Let's first consider the case that ip_route_input() succeeds: 323 * 324 * If the output device equals the logical bridge device the packet 325 * came in on, we can consider this bridging. The corresponding MAC 326 * address will be obtained in br_nf_pre_routing_finish_bridge. 327 * Otherwise, the packet is considered to be routed and we just 328 * change the destination MAC address so that the packet will 329 * later be passed up to the IP stack to be routed. For a redirected 330 * packet, ip_route_input() will give back the localhost as output device, 331 * which differs from the bridge device. 332 * 333 * Let's now consider the case that ip_route_input() fails: 334 * 335 * This can be because the destination address is martian, in which case 336 * the packet will be dropped. 337 * If IP forwarding is disabled, ip_route_input() will fail, while 338 * ip_route_output_key() can return success. The source 339 * address for ip_route_output_key() is set to zero, so ip_route_output_key() 340 * thinks we're handling a locally generated packet and won't care 341 * if IP forwarding is enabled. If the output device equals the logical bridge 342 * device, we proceed as if ip_route_input() succeeded. If it differs from the 343 * logical bridge port or if ip_route_output_key() fails we drop the packet. 344 */ 345 static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) 346 { 347 struct net_device *dev = skb->dev; 348 struct iphdr *iph = ip_hdr(skb); 349 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 350 struct rtable *rt; 351 int err; 352 353 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 354 355 if (nf_bridge->pkt_otherhost) { 356 skb->pkt_type = PACKET_OTHERHOST; 357 nf_bridge->pkt_otherhost = false; 358 } 359 nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; 360 if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { 361 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 362 struct in_device *in_dev = __in_dev_get_rcu(dev); 363 364 /* If err equals -EHOSTUNREACH the error is due to a 365 * martian destination or due to the fact that 366 * forwarding is disabled. For most martian packets, 367 * ip_route_output_key() will fail. It won't fail for 2 types of 368 * martian destinations: loopback destinations and destination 369 * 0.0.0.0. In both cases the packet will be dropped because the 370 * destination is the loopback device and not the bridge. */ 371 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 372 goto free_skb; 373 374 rt = ip_route_output(dev_net(dev), iph->daddr, 0, 375 RT_TOS(iph->tos), 0); 376 if (!IS_ERR(rt)) { 377 /* - Bridged-and-DNAT'ed traffic doesn't 378 * require ip_forwarding. */ 379 if (rt->dst.dev == dev) { 380 skb_dst_set(skb, &rt->dst); 381 goto bridged_dnat; 382 } 383 ip_rt_put(rt); 384 } 385 free_skb: 386 kfree_skb(skb); 387 return 0; 388 } else { 389 if (skb_dst(skb)->dev == dev) { 390 bridged_dnat: 391 skb->dev = nf_bridge->physindev; 392 nf_bridge_update_protocol(skb); 393 nf_bridge_push_encap_header(skb); 394 NF_HOOK_THRESH(NFPROTO_BRIDGE, 395 NF_BR_PRE_ROUTING, 396 sk, skb, skb->dev, NULL, 397 br_nf_pre_routing_finish_bridge, 398 1); 399 return 0; 400 } 401 ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); 402 skb->pkt_type = PACKET_HOST; 403 } 404 } else { 405 rt = bridge_parent_rtable(nf_bridge->physindev); 406 if (!rt) { 407 kfree_skb(skb); 408 return 0; 409 } 410 skb_dst_set_noref(skb, &rt->dst); 411 } 412 413 skb->dev = nf_bridge->physindev; 414 nf_bridge_update_protocol(skb); 415 nf_bridge_push_encap_header(skb); 416 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, 417 skb->dev, NULL, 418 br_handle_frame_finish, 1); 419 420 return 0; 421 } 422 423 static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) 424 { 425 struct net_device *vlan, *br; 426 427 br = bridge_parent(dev); 428 if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) 429 return br; 430 431 vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, 432 skb_vlan_tag_get(skb) & VLAN_VID_MASK); 433 434 return vlan ? vlan : br; 435 } 436 437 /* Some common code for IPv4/IPv6 */ 438 struct net_device *setup_pre_routing(struct sk_buff *skb) 439 { 440 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 441 442 if (skb->pkt_type == PACKET_OTHERHOST) { 443 skb->pkt_type = PACKET_HOST; 444 nf_bridge->pkt_otherhost = true; 445 } 446 447 nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; 448 nf_bridge->physindev = skb->dev; 449 skb->dev = brnf_get_logical_dev(skb, skb->dev); 450 451 if (skb->protocol == htons(ETH_P_8021Q)) 452 nf_bridge->orig_proto = BRNF_PROTO_8021Q; 453 else if (skb->protocol == htons(ETH_P_PPP_SES)) 454 nf_bridge->orig_proto = BRNF_PROTO_PPPOE; 455 456 /* Must drop socket now because of tproxy. */ 457 skb_orphan(skb); 458 return skb->dev; 459 } 460 461 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. 462 * Replicate the checks that IPv4 does on packet reception. 463 * Set skb->dev to the bridge device (i.e. parent of the 464 * receiving device) to make netfilter happy, the REDIRECT 465 * target in particular. Save the original destination IP 466 * address to be able to detect DNAT afterwards. */ 467 static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, 468 struct sk_buff *skb, 469 const struct nf_hook_state *state) 470 { 471 struct nf_bridge_info *nf_bridge; 472 struct net_bridge_port *p; 473 struct net_bridge *br; 474 __u32 len = nf_bridge_encap_header_len(skb); 475 476 if (unlikely(!pskb_may_pull(skb, len))) 477 return NF_DROP; 478 479 p = br_port_get_rcu(state->in); 480 if (p == NULL) 481 return NF_DROP; 482 br = p->br; 483 484 if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { 485 if (!brnf_call_ip6tables && !br->nf_call_ip6tables) 486 return NF_ACCEPT; 487 488 nf_bridge_pull_encap_header_rcsum(skb); 489 return br_nf_pre_routing_ipv6(ops, skb, state); 490 } 491 492 if (!brnf_call_iptables && !br->nf_call_iptables) 493 return NF_ACCEPT; 494 495 if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) 496 return NF_ACCEPT; 497 498 nf_bridge_pull_encap_header_rcsum(skb); 499 500 if (br_validate_ipv4(skb)) 501 return NF_DROP; 502 503 nf_bridge_put(skb->nf_bridge); 504 if (!nf_bridge_alloc(skb)) 505 return NF_DROP; 506 if (!setup_pre_routing(skb)) 507 return NF_DROP; 508 509 nf_bridge = nf_bridge_info_get(skb); 510 nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; 511 512 skb->protocol = htons(ETH_P_IP); 513 514 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, 515 skb->dev, NULL, 516 br_nf_pre_routing_finish); 517 518 return NF_STOLEN; 519 } 520 521 522 /* PF_BRIDGE/LOCAL_IN ************************************************/ 523 /* The packet is locally destined, which requires a real 524 * dst_entry, so detach the fake one. On the way up, the 525 * packet would pass through PRE_ROUTING again (which already 526 * took place when the packet entered the bridge), but we 527 * register an IPv4 PRE_ROUTING 'sabotage' hook that will 528 * prevent this from happening. */ 529 static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, 530 struct sk_buff *skb, 531 const struct nf_hook_state *state) 532 { 533 br_drop_fake_rtable(skb); 534 return NF_ACCEPT; 535 } 536 537 /* PF_BRIDGE/FORWARD *************************************************/ 538 static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) 539 { 540 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 541 struct net_device *in; 542 543 if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { 544 545 if (skb->protocol == htons(ETH_P_IP)) 546 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 547 548 if (skb->protocol == htons(ETH_P_IPV6)) 549 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; 550 551 in = nf_bridge->physindev; 552 if (nf_bridge->pkt_otherhost) { 553 skb->pkt_type = PACKET_OTHERHOST; 554 nf_bridge->pkt_otherhost = false; 555 } 556 nf_bridge_update_protocol(skb); 557 } else { 558 in = *((struct net_device **)(skb->cb)); 559 } 560 nf_bridge_push_encap_header(skb); 561 562 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb, 563 in, skb->dev, br_forward_finish, 1); 564 return 0; 565 } 566 567 568 /* This is the 'purely bridged' case. For IP, we pass the packet to 569 * netfilter with indev and outdev set to the bridge device, 570 * but we are still able to filter on the 'real' indev/outdev 571 * because of the physdev module. For ARP, indev and outdev are the 572 * bridge ports. */ 573 static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, 574 struct sk_buff *skb, 575 const struct nf_hook_state *state) 576 { 577 struct nf_bridge_info *nf_bridge; 578 struct net_device *parent; 579 u_int8_t pf; 580 581 if (!skb->nf_bridge) 582 return NF_ACCEPT; 583 584 /* Need exclusive nf_bridge_info since we might have multiple 585 * different physoutdevs. */ 586 if (!nf_bridge_unshare(skb)) 587 return NF_DROP; 588 589 nf_bridge = nf_bridge_info_get(skb); 590 if (!nf_bridge) 591 return NF_DROP; 592 593 parent = bridge_parent(state->out); 594 if (!parent) 595 return NF_DROP; 596 597 if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) 598 pf = NFPROTO_IPV4; 599 else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) 600 pf = NFPROTO_IPV6; 601 else 602 return NF_ACCEPT; 603 604 nf_bridge_pull_encap_header(skb); 605 606 if (skb->pkt_type == PACKET_OTHERHOST) { 607 skb->pkt_type = PACKET_HOST; 608 nf_bridge->pkt_otherhost = true; 609 } 610 611 if (pf == NFPROTO_IPV4) { 612 if (br_validate_ipv4(skb)) 613 return NF_DROP; 614 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 615 } 616 617 if (pf == NFPROTO_IPV6) { 618 if (br_validate_ipv6(skb)) 619 return NF_DROP; 620 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 621 } 622 623 nf_bridge->physoutdev = skb->dev; 624 if (pf == NFPROTO_IPV4) 625 skb->protocol = htons(ETH_P_IP); 626 else 627 skb->protocol = htons(ETH_P_IPV6); 628 629 NF_HOOK(pf, NF_INET_FORWARD, NULL, skb, 630 brnf_get_logical_dev(skb, state->in), 631 parent, br_nf_forward_finish); 632 633 return NF_STOLEN; 634 } 635 636 static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, 637 struct sk_buff *skb, 638 const struct nf_hook_state *state) 639 { 640 struct net_bridge_port *p; 641 struct net_bridge *br; 642 struct net_device **d = (struct net_device **)(skb->cb); 643 644 p = br_port_get_rcu(state->out); 645 if (p == NULL) 646 return NF_ACCEPT; 647 br = p->br; 648 649 if (!brnf_call_arptables && !br->nf_call_arptables) 650 return NF_ACCEPT; 651 652 if (!IS_ARP(skb)) { 653 if (!IS_VLAN_ARP(skb)) 654 return NF_ACCEPT; 655 nf_bridge_pull_encap_header(skb); 656 } 657 658 if (arp_hdr(skb)->ar_pln != 4) { 659 if (IS_VLAN_ARP(skb)) 660 nf_bridge_push_encap_header(skb); 661 return NF_ACCEPT; 662 } 663 *d = state->in; 664 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb, 665 state->in, state->out, br_nf_forward_finish); 666 667 return NF_STOLEN; 668 } 669 670 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 671 static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) 672 { 673 struct brnf_frag_data *data; 674 int err; 675 676 data = this_cpu_ptr(&brnf_frag_data_storage); 677 err = skb_cow_head(skb, data->size); 678 679 if (err) { 680 kfree_skb(skb); 681 return 0; 682 } 683 684 if (data->vlan_tci) { 685 skb->vlan_tci = data->vlan_tci; 686 skb->vlan_proto = data->vlan_proto; 687 } 688 689 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); 690 __skb_push(skb, data->encap_size); 691 692 nf_bridge_info_free(skb); 693 return br_dev_queue_push_xmit(sk, skb); 694 } 695 #endif 696 697 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) 698 static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, 699 int (*output)(struct sock *, struct sk_buff *)) 700 { 701 unsigned int mtu = ip_skb_dst_mtu(skb); 702 struct iphdr *iph = ip_hdr(skb); 703 struct rtable *rt = skb_rtable(skb); 704 struct net_device *dev = rt->dst.dev; 705 706 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || 707 (IPCB(skb)->frag_max_size && 708 IPCB(skb)->frag_max_size > mtu))) { 709 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 710 kfree_skb(skb); 711 return -EMSGSIZE; 712 } 713 714 return ip_do_fragment(sk, skb, output); 715 } 716 #endif 717 718 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 719 { 720 if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) 721 return PPPOE_SES_HLEN; 722 return 0; 723 } 724 725 static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) 726 { 727 struct nf_bridge_info *nf_bridge; 728 unsigned int mtu_reserved; 729 730 mtu_reserved = nf_bridge_mtu_reduction(skb); 731 732 if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { 733 nf_bridge_info_free(skb); 734 return br_dev_queue_push_xmit(sk, skb); 735 } 736 737 nf_bridge = nf_bridge_info_get(skb); 738 739 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) 740 /* This is wrong! We should preserve the original fragment 741 * boundaries by preserving frag_list rather than refragmenting. 742 */ 743 if (skb->protocol == htons(ETH_P_IP)) { 744 struct brnf_frag_data *data; 745 746 if (br_validate_ipv4(skb)) 747 return NF_DROP; 748 749 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 750 751 nf_bridge_update_protocol(skb); 752 753 data = this_cpu_ptr(&brnf_frag_data_storage); 754 755 data->vlan_tci = skb->vlan_tci; 756 data->vlan_proto = skb->vlan_proto; 757 data->encap_size = nf_bridge_encap_header_len(skb); 758 data->size = ETH_HLEN + data->encap_size; 759 760 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 761 data->size); 762 763 return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); 764 } 765 #endif 766 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 767 if (skb->protocol == htons(ETH_P_IPV6)) { 768 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 769 struct brnf_frag_data *data; 770 771 if (br_validate_ipv6(skb)) 772 return NF_DROP; 773 774 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 775 776 nf_bridge_update_protocol(skb); 777 778 data = this_cpu_ptr(&brnf_frag_data_storage); 779 data->encap_size = nf_bridge_encap_header_len(skb); 780 data->size = ETH_HLEN + data->encap_size; 781 782 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 783 data->size); 784 785 if (v6ops) 786 return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); 787 else 788 return -EMSGSIZE; 789 } 790 #endif 791 nf_bridge_info_free(skb); 792 return br_dev_queue_push_xmit(sk, skb); 793 } 794 795 /* PF_BRIDGE/POST_ROUTING ********************************************/ 796 static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, 797 struct sk_buff *skb, 798 const struct nf_hook_state *state) 799 { 800 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 801 struct net_device *realoutdev = bridge_parent(skb->dev); 802 u_int8_t pf; 803 804 /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in 805 * on a bridge, but was delivered locally and is now being routed: 806 * 807 * POST_ROUTING was already invoked from the ip stack. 808 */ 809 if (!nf_bridge || !nf_bridge->physoutdev) 810 return NF_ACCEPT; 811 812 if (!realoutdev) 813 return NF_DROP; 814 815 if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) 816 pf = NFPROTO_IPV4; 817 else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) 818 pf = NFPROTO_IPV6; 819 else 820 return NF_ACCEPT; 821 822 /* We assume any code from br_dev_queue_push_xmit onwards doesn't care 823 * about the value of skb->pkt_type. */ 824 if (skb->pkt_type == PACKET_OTHERHOST) { 825 skb->pkt_type = PACKET_HOST; 826 nf_bridge->pkt_otherhost = true; 827 } 828 829 nf_bridge_pull_encap_header(skb); 830 if (pf == NFPROTO_IPV4) 831 skb->protocol = htons(ETH_P_IP); 832 else 833 skb->protocol = htons(ETH_P_IPV6); 834 835 NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb, 836 NULL, realoutdev, 837 br_nf_dev_queue_xmit); 838 839 return NF_STOLEN; 840 } 841 842 /* IP/SABOTAGE *****************************************************/ 843 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 844 * for the second time. */ 845 static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, 846 struct sk_buff *skb, 847 const struct nf_hook_state *state) 848 { 849 if (skb->nf_bridge && 850 !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { 851 return NF_STOP; 852 } 853 854 return NF_ACCEPT; 855 } 856 857 /* This is called when br_netfilter has called into iptables/netfilter, 858 * and DNAT has taken place on a bridge-forwarded packet. 859 * 860 * neigh->output has created a new MAC header, with local br0 MAC 861 * as saddr. 862 * 863 * This restores the original MAC saddr of the bridged packet 864 * before invoking bridge forward logic to transmit the packet. 865 */ 866 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) 867 { 868 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 869 870 skb_pull(skb, ETH_HLEN); 871 nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; 872 873 BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); 874 875 skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), 876 nf_bridge->neigh_header, 877 ETH_HLEN - ETH_ALEN); 878 skb->dev = nf_bridge->physindev; 879 880 nf_bridge->physoutdev = NULL; 881 br_handle_frame_finish(NULL, skb); 882 } 883 884 static int br_nf_dev_xmit(struct sk_buff *skb) 885 { 886 if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { 887 br_nf_pre_routing_finish_bridge_slow(skb); 888 return 1; 889 } 890 return 0; 891 } 892 893 static const struct nf_br_ops br_ops = { 894 .br_dev_xmit_hook = br_nf_dev_xmit, 895 }; 896 897 void br_netfilter_enable(void) 898 { 899 } 900 EXPORT_SYMBOL_GPL(br_netfilter_enable); 901 902 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 903 * br_dev_queue_push_xmit is called afterwards */ 904 static struct nf_hook_ops br_nf_ops[] __read_mostly = { 905 { 906 .hook = br_nf_pre_routing, 907 .owner = THIS_MODULE, 908 .pf = NFPROTO_BRIDGE, 909 .hooknum = NF_BR_PRE_ROUTING, 910 .priority = NF_BR_PRI_BRNF, 911 }, 912 { 913 .hook = br_nf_local_in, 914 .owner = THIS_MODULE, 915 .pf = NFPROTO_BRIDGE, 916 .hooknum = NF_BR_LOCAL_IN, 917 .priority = NF_BR_PRI_BRNF, 918 }, 919 { 920 .hook = br_nf_forward_ip, 921 .owner = THIS_MODULE, 922 .pf = NFPROTO_BRIDGE, 923 .hooknum = NF_BR_FORWARD, 924 .priority = NF_BR_PRI_BRNF - 1, 925 }, 926 { 927 .hook = br_nf_forward_arp, 928 .owner = THIS_MODULE, 929 .pf = NFPROTO_BRIDGE, 930 .hooknum = NF_BR_FORWARD, 931 .priority = NF_BR_PRI_BRNF, 932 }, 933 { 934 .hook = br_nf_post_routing, 935 .owner = THIS_MODULE, 936 .pf = NFPROTO_BRIDGE, 937 .hooknum = NF_BR_POST_ROUTING, 938 .priority = NF_BR_PRI_LAST, 939 }, 940 { 941 .hook = ip_sabotage_in, 942 .owner = THIS_MODULE, 943 .pf = NFPROTO_IPV4, 944 .hooknum = NF_INET_PRE_ROUTING, 945 .priority = NF_IP_PRI_FIRST, 946 }, 947 { 948 .hook = ip_sabotage_in, 949 .owner = THIS_MODULE, 950 .pf = NFPROTO_IPV6, 951 .hooknum = NF_INET_PRE_ROUTING, 952 .priority = NF_IP6_PRI_FIRST, 953 }, 954 }; 955 956 #ifdef CONFIG_SYSCTL 957 static 958 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, 959 void __user *buffer, size_t *lenp, loff_t *ppos) 960 { 961 int ret; 962 963 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 964 965 if (write && *(int *)(ctl->data)) 966 *(int *)(ctl->data) = 1; 967 return ret; 968 } 969 970 static struct ctl_table brnf_table[] = { 971 { 972 .procname = "bridge-nf-call-arptables", 973 .data = &brnf_call_arptables, 974 .maxlen = sizeof(int), 975 .mode = 0644, 976 .proc_handler = brnf_sysctl_call_tables, 977 }, 978 { 979 .procname = "bridge-nf-call-iptables", 980 .data = &brnf_call_iptables, 981 .maxlen = sizeof(int), 982 .mode = 0644, 983 .proc_handler = brnf_sysctl_call_tables, 984 }, 985 { 986 .procname = "bridge-nf-call-ip6tables", 987 .data = &brnf_call_ip6tables, 988 .maxlen = sizeof(int), 989 .mode = 0644, 990 .proc_handler = brnf_sysctl_call_tables, 991 }, 992 { 993 .procname = "bridge-nf-filter-vlan-tagged", 994 .data = &brnf_filter_vlan_tagged, 995 .maxlen = sizeof(int), 996 .mode = 0644, 997 .proc_handler = brnf_sysctl_call_tables, 998 }, 999 { 1000 .procname = "bridge-nf-filter-pppoe-tagged", 1001 .data = &brnf_filter_pppoe_tagged, 1002 .maxlen = sizeof(int), 1003 .mode = 0644, 1004 .proc_handler = brnf_sysctl_call_tables, 1005 }, 1006 { 1007 .procname = "bridge-nf-pass-vlan-input-dev", 1008 .data = &brnf_pass_vlan_indev, 1009 .maxlen = sizeof(int), 1010 .mode = 0644, 1011 .proc_handler = brnf_sysctl_call_tables, 1012 }, 1013 { } 1014 }; 1015 #endif 1016 1017 static int __init br_netfilter_init(void) 1018 { 1019 int ret; 1020 1021 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1022 if (ret < 0) 1023 return ret; 1024 1025 #ifdef CONFIG_SYSCTL 1026 brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); 1027 if (brnf_sysctl_header == NULL) { 1028 printk(KERN_WARNING 1029 "br_netfilter: can't register to sysctl.\n"); 1030 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1031 return -ENOMEM; 1032 } 1033 #endif 1034 RCU_INIT_POINTER(nf_br_ops, &br_ops); 1035 printk(KERN_NOTICE "Bridge firewalling registered\n"); 1036 return 0; 1037 } 1038 1039 static void __exit br_netfilter_fini(void) 1040 { 1041 RCU_INIT_POINTER(nf_br_ops, NULL); 1042 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1043 #ifdef CONFIG_SYSCTL 1044 unregister_net_sysctl_table(brnf_sysctl_header); 1045 #endif 1046 } 1047 1048 module_init(br_netfilter_init); 1049 module_exit(br_netfilter_fini); 1050 1051 MODULE_LICENSE("GPL"); 1052 MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); 1053 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 1054 MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); 1055