1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Handle firewalling 4 * Linux ethernet bridge 5 * 6 * Authors: 7 * Lennert Buytenhek <buytenh@gnu.org> 8 * Bart De Schuymer <bdschuym@pandora.be> 9 * 10 * Lennert dedicates this file to Kerstin Wurdinger. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/slab.h> 16 #include <linux/ip.h> 17 #include <linux/netdevice.h> 18 #include <linux/skbuff.h> 19 #include <linux/if_arp.h> 20 #include <linux/if_ether.h> 21 #include <linux/if_vlan.h> 22 #include <linux/if_pppox.h> 23 #include <linux/ppp_defs.h> 24 #include <linux/netfilter_bridge.h> 25 #include <uapi/linux/netfilter_bridge.h> 26 #include <linux/netfilter_ipv4.h> 27 #include <linux/netfilter_ipv6.h> 28 #include <linux/netfilter_arp.h> 29 #include <linux/in_route.h> 30 #include <linux/rculist.h> 31 #include <linux/inetdevice.h> 32 33 #include <net/ip.h> 34 #include <net/ipv6.h> 35 #include <net/addrconf.h> 36 #include <net/route.h> 37 #include <net/netfilter/br_netfilter.h> 38 #include <net/netns/generic.h> 39 40 #include <linux/uaccess.h> 41 #include "br_private.h" 42 #ifdef CONFIG_SYSCTL 43 #include <linux/sysctl.h> 44 #endif 45 46 static unsigned int brnf_net_id __read_mostly; 47 48 struct brnf_net { 49 bool enabled; 50 51 #ifdef CONFIG_SYSCTL 52 struct ctl_table_header *ctl_hdr; 53 #endif 54 55 /* default value is 1 */ 56 int call_iptables; 57 int call_ip6tables; 58 int call_arptables; 59 60 /* default value is 0 */ 61 int filter_vlan_tagged; 62 int filter_pppoe_tagged; 63 int pass_vlan_indev; 64 }; 65 66 #define IS_IP(skb) \ 67 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) 68 69 #define IS_IPV6(skb) \ 70 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) 71 72 #define IS_ARP(skb) \ 73 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) 74 75 static inline __be16 vlan_proto(const struct sk_buff *skb) 76 { 77 if (skb_vlan_tag_present(skb)) 78 return skb->protocol; 79 else if (skb->protocol == htons(ETH_P_8021Q)) 80 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 81 else 82 return 0; 83 } 84 85 static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net) 86 { 87 struct brnf_net *brnet = net_generic(net, brnf_net_id); 88 89 return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged; 90 } 91 92 static inline bool is_vlan_ipv6(const struct sk_buff *skb, 93 const struct net *net) 94 { 95 struct brnf_net *brnet = net_generic(net, brnf_net_id); 96 97 return vlan_proto(skb) == htons(ETH_P_IPV6) && 98 brnet->filter_vlan_tagged; 99 } 100 101 static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net) 102 { 103 struct brnf_net *brnet = net_generic(net, brnf_net_id); 104 105 return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged; 106 } 107 108 static inline __be16 pppoe_proto(const struct sk_buff *skb) 109 { 110 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 111 sizeof(struct pppoe_hdr))); 112 } 113 114 static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net) 115 { 116 struct brnf_net *brnet = net_generic(net, brnf_net_id); 117 118 return skb->protocol == htons(ETH_P_PPP_SES) && 119 pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged; 120 } 121 122 static inline bool is_pppoe_ipv6(const struct sk_buff *skb, 123 const struct net *net) 124 { 125 struct brnf_net *brnet = net_generic(net, brnf_net_id); 126 127 return skb->protocol == htons(ETH_P_PPP_SES) && 128 pppoe_proto(skb) == htons(PPP_IPV6) && 129 brnet->filter_pppoe_tagged; 130 } 131 132 /* largest possible L2 header, see br_nf_dev_queue_xmit() */ 133 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 134 135 struct brnf_frag_data { 136 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 137 u8 encap_size; 138 u8 size; 139 u16 vlan_tci; 140 __be16 vlan_proto; 141 }; 142 143 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 144 145 static void nf_bridge_info_free(struct sk_buff *skb) 146 { 147 skb_ext_del(skb, SKB_EXT_BRIDGE_NF); 148 } 149 150 static inline struct net_device *bridge_parent(const struct net_device *dev) 151 { 152 struct net_bridge_port *port; 153 154 port = br_port_get_rcu(dev); 155 return port ? port->br->dev : NULL; 156 } 157 158 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) 159 { 160 return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); 161 } 162 163 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 164 { 165 switch (skb->protocol) { 166 case __cpu_to_be16(ETH_P_8021Q): 167 return VLAN_HLEN; 168 case __cpu_to_be16(ETH_P_PPP_SES): 169 return PPPOE_SES_HLEN; 170 default: 171 return 0; 172 } 173 } 174 175 static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) 176 { 177 unsigned int len = nf_bridge_encap_header_len(skb); 178 179 skb_pull(skb, len); 180 skb->network_header += len; 181 } 182 183 static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) 184 { 185 unsigned int len = nf_bridge_encap_header_len(skb); 186 187 skb_pull_rcsum(skb, len); 188 skb->network_header += len; 189 } 190 191 /* When handing a packet over to the IP layer 192 * check whether we have a skb that is in the 193 * expected format 194 */ 195 196 static int br_validate_ipv4(struct net *net, struct sk_buff *skb) 197 { 198 const struct iphdr *iph; 199 u32 len; 200 201 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 202 goto inhdr_error; 203 204 iph = ip_hdr(skb); 205 206 /* Basic sanity checks */ 207 if (iph->ihl < 5 || iph->version != 4) 208 goto inhdr_error; 209 210 if (!pskb_may_pull(skb, iph->ihl*4)) 211 goto inhdr_error; 212 213 iph = ip_hdr(skb); 214 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 215 goto csum_error; 216 217 len = ntohs(iph->tot_len); 218 if (skb->len < len) { 219 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); 220 goto drop; 221 } else if (len < (iph->ihl*4)) 222 goto inhdr_error; 223 224 if (pskb_trim_rcsum(skb, len)) { 225 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 226 goto drop; 227 } 228 229 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 230 /* We should really parse IP options here but until 231 * somebody who actually uses IP options complains to 232 * us we'll just silently ignore the options because 233 * we're lazy! 234 */ 235 return 0; 236 237 csum_error: 238 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); 239 inhdr_error: 240 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 241 drop: 242 return -1; 243 } 244 245 void nf_bridge_update_protocol(struct sk_buff *skb) 246 { 247 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 248 249 switch (nf_bridge->orig_proto) { 250 case BRNF_PROTO_8021Q: 251 skb->protocol = htons(ETH_P_8021Q); 252 break; 253 case BRNF_PROTO_PPPOE: 254 skb->protocol = htons(ETH_P_PPP_SES); 255 break; 256 case BRNF_PROTO_UNCHANGED: 257 break; 258 } 259 } 260 261 /* Obtain the correct destination MAC address, while preserving the original 262 * source MAC address. If we already know this address, we just copy it. If we 263 * don't, we use the neighbour framework to find out. In both cases, we make 264 * sure that br_handle_frame_finish() is called afterwards. 265 */ 266 int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) 267 { 268 struct neighbour *neigh; 269 struct dst_entry *dst; 270 271 skb->dev = bridge_parent(skb->dev); 272 if (!skb->dev) 273 goto free_skb; 274 dst = skb_dst(skb); 275 neigh = dst_neigh_lookup_skb(dst, skb); 276 if (neigh) { 277 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 278 int ret; 279 280 if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { 281 neigh_hh_bridge(&neigh->hh, skb); 282 skb->dev = nf_bridge->physindev; 283 ret = br_handle_frame_finish(net, sk, skb); 284 } else { 285 /* the neighbour function below overwrites the complete 286 * MAC header, so we save the Ethernet source address and 287 * protocol number. 288 */ 289 skb_copy_from_linear_data_offset(skb, 290 -(ETH_HLEN-ETH_ALEN), 291 nf_bridge->neigh_header, 292 ETH_HLEN-ETH_ALEN); 293 /* tell br_dev_xmit to continue with forwarding */ 294 nf_bridge->bridged_dnat = 1; 295 /* FIXME Need to refragment */ 296 ret = neigh->output(neigh, skb); 297 } 298 neigh_release(neigh); 299 return ret; 300 } 301 free_skb: 302 kfree_skb(skb); 303 return 0; 304 } 305 306 static inline bool 307 br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, 308 const struct nf_bridge_info *nf_bridge) 309 { 310 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; 311 } 312 313 /* This requires some explaining. If DNAT has taken place, 314 * we will need to fix up the destination Ethernet address. 315 * This is also true when SNAT takes place (for the reply direction). 316 * 317 * There are two cases to consider: 318 * 1. The packet was DNAT'ed to a device in the same bridge 319 * port group as it was received on. We can still bridge 320 * the packet. 321 * 2. The packet was DNAT'ed to a different device, either 322 * a non-bridged device or another bridge port group. 323 * The packet will need to be routed. 324 * 325 * The correct way of distinguishing between these two cases is to 326 * call ip_route_input() and to look at skb->dst->dev, which is 327 * changed to the destination device if ip_route_input() succeeds. 328 * 329 * Let's first consider the case that ip_route_input() succeeds: 330 * 331 * If the output device equals the logical bridge device the packet 332 * came in on, we can consider this bridging. The corresponding MAC 333 * address will be obtained in br_nf_pre_routing_finish_bridge. 334 * Otherwise, the packet is considered to be routed and we just 335 * change the destination MAC address so that the packet will 336 * later be passed up to the IP stack to be routed. For a redirected 337 * packet, ip_route_input() will give back the localhost as output device, 338 * which differs from the bridge device. 339 * 340 * Let's now consider the case that ip_route_input() fails: 341 * 342 * This can be because the destination address is martian, in which case 343 * the packet will be dropped. 344 * If IP forwarding is disabled, ip_route_input() will fail, while 345 * ip_route_output_key() can return success. The source 346 * address for ip_route_output_key() is set to zero, so ip_route_output_key() 347 * thinks we're handling a locally generated packet and won't care 348 * if IP forwarding is enabled. If the output device equals the logical bridge 349 * device, we proceed as if ip_route_input() succeeded. If it differs from the 350 * logical bridge port or if ip_route_output_key() fails we drop the packet. 351 */ 352 static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 353 { 354 struct net_device *dev = skb->dev; 355 struct iphdr *iph = ip_hdr(skb); 356 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 357 struct rtable *rt; 358 int err; 359 360 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 361 362 if (nf_bridge->pkt_otherhost) { 363 skb->pkt_type = PACKET_OTHERHOST; 364 nf_bridge->pkt_otherhost = false; 365 } 366 nf_bridge->in_prerouting = 0; 367 if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { 368 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 369 struct in_device *in_dev = __in_dev_get_rcu(dev); 370 371 /* If err equals -EHOSTUNREACH the error is due to a 372 * martian destination or due to the fact that 373 * forwarding is disabled. For most martian packets, 374 * ip_route_output_key() will fail. It won't fail for 2 types of 375 * martian destinations: loopback destinations and destination 376 * 0.0.0.0. In both cases the packet will be dropped because the 377 * destination is the loopback device and not the bridge. */ 378 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 379 goto free_skb; 380 381 rt = ip_route_output(net, iph->daddr, 0, 382 RT_TOS(iph->tos), 0); 383 if (!IS_ERR(rt)) { 384 /* - Bridged-and-DNAT'ed traffic doesn't 385 * require ip_forwarding. */ 386 if (rt->dst.dev == dev) { 387 skb_dst_drop(skb); 388 skb_dst_set(skb, &rt->dst); 389 goto bridged_dnat; 390 } 391 ip_rt_put(rt); 392 } 393 free_skb: 394 kfree_skb(skb); 395 return 0; 396 } else { 397 if (skb_dst(skb)->dev == dev) { 398 bridged_dnat: 399 skb->dev = nf_bridge->physindev; 400 nf_bridge_update_protocol(skb); 401 nf_bridge_push_encap_header(skb); 402 br_nf_hook_thresh(NF_BR_PRE_ROUTING, 403 net, sk, skb, skb->dev, 404 NULL, 405 br_nf_pre_routing_finish_bridge); 406 return 0; 407 } 408 ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); 409 skb->pkt_type = PACKET_HOST; 410 } 411 } else { 412 rt = bridge_parent_rtable(nf_bridge->physindev); 413 if (!rt) { 414 kfree_skb(skb); 415 return 0; 416 } 417 skb_dst_drop(skb); 418 skb_dst_set_noref(skb, &rt->dst); 419 } 420 421 skb->dev = nf_bridge->physindev; 422 nf_bridge_update_protocol(skb); 423 nf_bridge_push_encap_header(skb); 424 br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, 425 br_handle_frame_finish); 426 return 0; 427 } 428 429 static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, 430 const struct net_device *dev, 431 const struct net *net) 432 { 433 struct net_device *vlan, *br; 434 struct brnf_net *brnet = net_generic(net, brnf_net_id); 435 436 br = bridge_parent(dev); 437 438 if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) 439 return br; 440 441 vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, 442 skb_vlan_tag_get(skb) & VLAN_VID_MASK); 443 444 return vlan ? vlan : br; 445 } 446 447 /* Some common code for IPv4/IPv6 */ 448 struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) 449 { 450 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 451 452 if (skb->pkt_type == PACKET_OTHERHOST) { 453 skb->pkt_type = PACKET_HOST; 454 nf_bridge->pkt_otherhost = true; 455 } 456 457 nf_bridge->in_prerouting = 1; 458 nf_bridge->physindev = skb->dev; 459 skb->dev = brnf_get_logical_dev(skb, skb->dev, net); 460 461 if (skb->protocol == htons(ETH_P_8021Q)) 462 nf_bridge->orig_proto = BRNF_PROTO_8021Q; 463 else if (skb->protocol == htons(ETH_P_PPP_SES)) 464 nf_bridge->orig_proto = BRNF_PROTO_PPPOE; 465 466 /* Must drop socket now because of tproxy. */ 467 skb_orphan(skb); 468 return skb->dev; 469 } 470 471 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. 472 * Replicate the checks that IPv4 does on packet reception. 473 * Set skb->dev to the bridge device (i.e. parent of the 474 * receiving device) to make netfilter happy, the REDIRECT 475 * target in particular. Save the original destination IP 476 * address to be able to detect DNAT afterwards. */ 477 static unsigned int br_nf_pre_routing(void *priv, 478 struct sk_buff *skb, 479 const struct nf_hook_state *state) 480 { 481 struct nf_bridge_info *nf_bridge; 482 struct net_bridge_port *p; 483 struct net_bridge *br; 484 __u32 len = nf_bridge_encap_header_len(skb); 485 struct brnf_net *brnet; 486 487 if (unlikely(!pskb_may_pull(skb, len))) 488 return NF_DROP; 489 490 p = br_port_get_rcu(state->in); 491 if (p == NULL) 492 return NF_DROP; 493 br = p->br; 494 495 brnet = net_generic(state->net, brnf_net_id); 496 if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 497 is_pppoe_ipv6(skb, state->net)) { 498 if (!brnet->call_ip6tables && 499 !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) 500 return NF_ACCEPT; 501 if (!ipv6_mod_enabled()) { 502 pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); 503 return NF_DROP; 504 } 505 506 nf_bridge_pull_encap_header_rcsum(skb); 507 return br_nf_pre_routing_ipv6(priv, skb, state); 508 } 509 510 if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) 511 return NF_ACCEPT; 512 513 if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) && 514 !is_pppoe_ip(skb, state->net)) 515 return NF_ACCEPT; 516 517 nf_bridge_pull_encap_header_rcsum(skb); 518 519 if (br_validate_ipv4(state->net, skb)) 520 return NF_DROP; 521 522 if (!nf_bridge_alloc(skb)) 523 return NF_DROP; 524 if (!setup_pre_routing(skb, state->net)) 525 return NF_DROP; 526 527 nf_bridge = nf_bridge_info_get(skb); 528 nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; 529 530 skb->protocol = htons(ETH_P_IP); 531 skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; 532 533 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, 534 skb->dev, NULL, 535 br_nf_pre_routing_finish); 536 537 return NF_STOLEN; 538 } 539 540 541 /* PF_BRIDGE/FORWARD *************************************************/ 542 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 543 { 544 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 545 struct net_device *in; 546 547 if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) { 548 549 if (skb->protocol == htons(ETH_P_IP)) 550 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 551 552 if (skb->protocol == htons(ETH_P_IPV6)) 553 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; 554 555 in = nf_bridge->physindev; 556 if (nf_bridge->pkt_otherhost) { 557 skb->pkt_type = PACKET_OTHERHOST; 558 nf_bridge->pkt_otherhost = false; 559 } 560 nf_bridge_update_protocol(skb); 561 } else { 562 in = *((struct net_device **)(skb->cb)); 563 } 564 nf_bridge_push_encap_header(skb); 565 566 br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, 567 br_forward_finish); 568 return 0; 569 } 570 571 572 /* This is the 'purely bridged' case. For IP, we pass the packet to 573 * netfilter with indev and outdev set to the bridge device, 574 * but we are still able to filter on the 'real' indev/outdev 575 * because of the physdev module. For ARP, indev and outdev are the 576 * bridge ports. */ 577 static unsigned int br_nf_forward_ip(void *priv, 578 struct sk_buff *skb, 579 const struct nf_hook_state *state) 580 { 581 struct nf_bridge_info *nf_bridge; 582 struct net_device *parent; 583 u_int8_t pf; 584 585 nf_bridge = nf_bridge_info_get(skb); 586 if (!nf_bridge) 587 return NF_ACCEPT; 588 589 /* Need exclusive nf_bridge_info since we might have multiple 590 * different physoutdevs. */ 591 if (!nf_bridge_unshare(skb)) 592 return NF_DROP; 593 594 nf_bridge = nf_bridge_info_get(skb); 595 if (!nf_bridge) 596 return NF_DROP; 597 598 parent = bridge_parent(state->out); 599 if (!parent) 600 return NF_DROP; 601 602 if (IS_IP(skb) || is_vlan_ip(skb, state->net) || 603 is_pppoe_ip(skb, state->net)) 604 pf = NFPROTO_IPV4; 605 else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 606 is_pppoe_ipv6(skb, state->net)) 607 pf = NFPROTO_IPV6; 608 else 609 return NF_ACCEPT; 610 611 nf_bridge_pull_encap_header(skb); 612 613 if (skb->pkt_type == PACKET_OTHERHOST) { 614 skb->pkt_type = PACKET_HOST; 615 nf_bridge->pkt_otherhost = true; 616 } 617 618 if (pf == NFPROTO_IPV4) { 619 if (br_validate_ipv4(state->net, skb)) 620 return NF_DROP; 621 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 622 } 623 624 if (pf == NFPROTO_IPV6) { 625 if (br_validate_ipv6(state->net, skb)) 626 return NF_DROP; 627 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 628 } 629 630 nf_bridge->physoutdev = skb->dev; 631 if (pf == NFPROTO_IPV4) 632 skb->protocol = htons(ETH_P_IP); 633 else 634 skb->protocol = htons(ETH_P_IPV6); 635 636 NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, 637 brnf_get_logical_dev(skb, state->in, state->net), 638 parent, br_nf_forward_finish); 639 640 return NF_STOLEN; 641 } 642 643 static unsigned int br_nf_forward_arp(void *priv, 644 struct sk_buff *skb, 645 const struct nf_hook_state *state) 646 { 647 struct net_bridge_port *p; 648 struct net_bridge *br; 649 struct net_device **d = (struct net_device **)(skb->cb); 650 struct brnf_net *brnet; 651 652 p = br_port_get_rcu(state->out); 653 if (p == NULL) 654 return NF_ACCEPT; 655 br = p->br; 656 657 brnet = net_generic(state->net, brnf_net_id); 658 if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) 659 return NF_ACCEPT; 660 661 if (!IS_ARP(skb)) { 662 if (!is_vlan_arp(skb, state->net)) 663 return NF_ACCEPT; 664 nf_bridge_pull_encap_header(skb); 665 } 666 667 if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) 668 return NF_DROP; 669 670 if (arp_hdr(skb)->ar_pln != 4) { 671 if (is_vlan_arp(skb, state->net)) 672 nf_bridge_push_encap_header(skb); 673 return NF_ACCEPT; 674 } 675 *d = state->in; 676 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, 677 state->in, state->out, br_nf_forward_finish); 678 679 return NF_STOLEN; 680 } 681 682 static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 683 { 684 struct brnf_frag_data *data; 685 int err; 686 687 data = this_cpu_ptr(&brnf_frag_data_storage); 688 err = skb_cow_head(skb, data->size); 689 690 if (err) { 691 kfree_skb(skb); 692 return 0; 693 } 694 695 if (data->vlan_proto) 696 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); 697 698 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); 699 __skb_push(skb, data->encap_size); 700 701 nf_bridge_info_free(skb); 702 return br_dev_queue_push_xmit(net, sk, skb); 703 } 704 705 static int 706 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 707 int (*output)(struct net *, struct sock *, struct sk_buff *)) 708 { 709 unsigned int mtu = ip_skb_dst_mtu(sk, skb); 710 struct iphdr *iph = ip_hdr(skb); 711 712 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || 713 (IPCB(skb)->frag_max_size && 714 IPCB(skb)->frag_max_size > mtu))) { 715 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 716 kfree_skb(skb); 717 return -EMSGSIZE; 718 } 719 720 return ip_do_fragment(net, sk, skb, output); 721 } 722 723 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 724 { 725 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 726 727 if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) 728 return PPPOE_SES_HLEN; 729 return 0; 730 } 731 732 static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 733 { 734 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 735 unsigned int mtu, mtu_reserved; 736 737 mtu_reserved = nf_bridge_mtu_reduction(skb); 738 mtu = skb->dev->mtu; 739 740 if (nf_bridge->pkt_otherhost) { 741 skb->pkt_type = PACKET_OTHERHOST; 742 nf_bridge->pkt_otherhost = false; 743 } 744 745 if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) 746 mtu = nf_bridge->frag_max_size; 747 748 nf_bridge_update_protocol(skb); 749 nf_bridge_push_encap_header(skb); 750 751 if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { 752 nf_bridge_info_free(skb); 753 return br_dev_queue_push_xmit(net, sk, skb); 754 } 755 756 /* This is wrong! We should preserve the original fragment 757 * boundaries by preserving frag_list rather than refragmenting. 758 */ 759 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && 760 skb->protocol == htons(ETH_P_IP)) { 761 struct brnf_frag_data *data; 762 763 if (br_validate_ipv4(net, skb)) 764 goto drop; 765 766 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 767 768 data = this_cpu_ptr(&brnf_frag_data_storage); 769 770 if (skb_vlan_tag_present(skb)) { 771 data->vlan_tci = skb->vlan_tci; 772 data->vlan_proto = skb->vlan_proto; 773 } else { 774 data->vlan_proto = 0; 775 } 776 777 data->encap_size = nf_bridge_encap_header_len(skb); 778 data->size = ETH_HLEN + data->encap_size; 779 780 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 781 data->size); 782 783 return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); 784 } 785 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && 786 skb->protocol == htons(ETH_P_IPV6)) { 787 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 788 struct brnf_frag_data *data; 789 790 if (br_validate_ipv6(net, skb)) 791 goto drop; 792 793 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 794 795 data = this_cpu_ptr(&brnf_frag_data_storage); 796 data->encap_size = nf_bridge_encap_header_len(skb); 797 data->size = ETH_HLEN + data->encap_size; 798 799 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 800 data->size); 801 802 if (v6ops) 803 return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); 804 805 kfree_skb(skb); 806 return -EMSGSIZE; 807 } 808 nf_bridge_info_free(skb); 809 return br_dev_queue_push_xmit(net, sk, skb); 810 drop: 811 kfree_skb(skb); 812 return 0; 813 } 814 815 /* PF_BRIDGE/POST_ROUTING ********************************************/ 816 static unsigned int br_nf_post_routing(void *priv, 817 struct sk_buff *skb, 818 const struct nf_hook_state *state) 819 { 820 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 821 struct net_device *realoutdev = bridge_parent(skb->dev); 822 u_int8_t pf; 823 824 /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in 825 * on a bridge, but was delivered locally and is now being routed: 826 * 827 * POST_ROUTING was already invoked from the ip stack. 828 */ 829 if (!nf_bridge || !nf_bridge->physoutdev) 830 return NF_ACCEPT; 831 832 if (!realoutdev) 833 return NF_DROP; 834 835 if (IS_IP(skb) || is_vlan_ip(skb, state->net) || 836 is_pppoe_ip(skb, state->net)) 837 pf = NFPROTO_IPV4; 838 else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 839 is_pppoe_ipv6(skb, state->net)) 840 pf = NFPROTO_IPV6; 841 else 842 return NF_ACCEPT; 843 844 if (skb->pkt_type == PACKET_OTHERHOST) { 845 skb->pkt_type = PACKET_HOST; 846 nf_bridge->pkt_otherhost = true; 847 } 848 849 nf_bridge_pull_encap_header(skb); 850 if (pf == NFPROTO_IPV4) 851 skb->protocol = htons(ETH_P_IP); 852 else 853 skb->protocol = htons(ETH_P_IPV6); 854 855 NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, 856 NULL, realoutdev, 857 br_nf_dev_queue_xmit); 858 859 return NF_STOLEN; 860 } 861 862 /* IP/SABOTAGE *****************************************************/ 863 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 864 * for the second time. */ 865 static unsigned int ip_sabotage_in(void *priv, 866 struct sk_buff *skb, 867 const struct nf_hook_state *state) 868 { 869 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 870 871 if (nf_bridge && !nf_bridge->in_prerouting && 872 !netif_is_l3_master(skb->dev) && 873 !netif_is_l3_slave(skb->dev)) { 874 state->okfn(state->net, state->sk, skb); 875 return NF_STOLEN; 876 } 877 878 return NF_ACCEPT; 879 } 880 881 /* This is called when br_netfilter has called into iptables/netfilter, 882 * and DNAT has taken place on a bridge-forwarded packet. 883 * 884 * neigh->output has created a new MAC header, with local br0 MAC 885 * as saddr. 886 * 887 * This restores the original MAC saddr of the bridged packet 888 * before invoking bridge forward logic to transmit the packet. 889 */ 890 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) 891 { 892 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 893 894 skb_pull(skb, ETH_HLEN); 895 nf_bridge->bridged_dnat = 0; 896 897 BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); 898 899 skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), 900 nf_bridge->neigh_header, 901 ETH_HLEN - ETH_ALEN); 902 skb->dev = nf_bridge->physindev; 903 904 nf_bridge->physoutdev = NULL; 905 br_handle_frame_finish(dev_net(skb->dev), NULL, skb); 906 } 907 908 static int br_nf_dev_xmit(struct sk_buff *skb) 909 { 910 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 911 912 if (nf_bridge && nf_bridge->bridged_dnat) { 913 br_nf_pre_routing_finish_bridge_slow(skb); 914 return 1; 915 } 916 return 0; 917 } 918 919 static const struct nf_br_ops br_ops = { 920 .br_dev_xmit_hook = br_nf_dev_xmit, 921 }; 922 923 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 924 * br_dev_queue_push_xmit is called afterwards */ 925 static const struct nf_hook_ops br_nf_ops[] = { 926 { 927 .hook = br_nf_pre_routing, 928 .pf = NFPROTO_BRIDGE, 929 .hooknum = NF_BR_PRE_ROUTING, 930 .priority = NF_BR_PRI_BRNF, 931 }, 932 { 933 .hook = br_nf_forward_ip, 934 .pf = NFPROTO_BRIDGE, 935 .hooknum = NF_BR_FORWARD, 936 .priority = NF_BR_PRI_BRNF - 1, 937 }, 938 { 939 .hook = br_nf_forward_arp, 940 .pf = NFPROTO_BRIDGE, 941 .hooknum = NF_BR_FORWARD, 942 .priority = NF_BR_PRI_BRNF, 943 }, 944 { 945 .hook = br_nf_post_routing, 946 .pf = NFPROTO_BRIDGE, 947 .hooknum = NF_BR_POST_ROUTING, 948 .priority = NF_BR_PRI_LAST, 949 }, 950 { 951 .hook = ip_sabotage_in, 952 .pf = NFPROTO_IPV4, 953 .hooknum = NF_INET_PRE_ROUTING, 954 .priority = NF_IP_PRI_FIRST, 955 }, 956 { 957 .hook = ip_sabotage_in, 958 .pf = NFPROTO_IPV6, 959 .hooknum = NF_INET_PRE_ROUTING, 960 .priority = NF_IP6_PRI_FIRST, 961 }, 962 }; 963 964 static int brnf_device_event(struct notifier_block *unused, unsigned long event, 965 void *ptr) 966 { 967 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 968 struct brnf_net *brnet; 969 struct net *net; 970 int ret; 971 972 if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) 973 return NOTIFY_DONE; 974 975 ASSERT_RTNL(); 976 977 net = dev_net(dev); 978 brnet = net_generic(net, brnf_net_id); 979 if (brnet->enabled) 980 return NOTIFY_OK; 981 982 ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 983 if (ret) 984 return NOTIFY_BAD; 985 986 brnet->enabled = true; 987 return NOTIFY_OK; 988 } 989 990 static struct notifier_block brnf_notifier __read_mostly = { 991 .notifier_call = brnf_device_event, 992 }; 993 994 /* recursively invokes nf_hook_slow (again), skipping already-called 995 * hooks (< NF_BR_PRI_BRNF). 996 * 997 * Called with rcu read lock held. 998 */ 999 int br_nf_hook_thresh(unsigned int hook, struct net *net, 1000 struct sock *sk, struct sk_buff *skb, 1001 struct net_device *indev, 1002 struct net_device *outdev, 1003 int (*okfn)(struct net *, struct sock *, 1004 struct sk_buff *)) 1005 { 1006 const struct nf_hook_entries *e; 1007 struct nf_hook_state state; 1008 struct nf_hook_ops **ops; 1009 unsigned int i; 1010 int ret; 1011 1012 e = rcu_dereference(net->nf.hooks_bridge[hook]); 1013 if (!e) 1014 return okfn(net, sk, skb); 1015 1016 ops = nf_hook_entries_get_hook_ops(e); 1017 for (i = 0; i < e->num_hook_entries; i++) { 1018 /* These hooks have already been called */ 1019 if (ops[i]->priority < NF_BR_PRI_BRNF) 1020 continue; 1021 1022 /* These hooks have not been called yet, run them. */ 1023 if (ops[i]->priority > NF_BR_PRI_BRNF) 1024 break; 1025 1026 /* take a closer look at NF_BR_PRI_BRNF. */ 1027 if (ops[i]->hook == br_nf_pre_routing) { 1028 /* This hook diverted the skb to this function, 1029 * hooks after this have not been run yet. 1030 */ 1031 i++; 1032 break; 1033 } 1034 } 1035 1036 nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, 1037 sk, net, okfn); 1038 1039 ret = nf_hook_slow(skb, &state, e, i); 1040 if (ret == 1) 1041 ret = okfn(net, sk, skb); 1042 1043 return ret; 1044 } 1045 1046 #ifdef CONFIG_SYSCTL 1047 static 1048 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, 1049 void *buffer, size_t *lenp, loff_t *ppos) 1050 { 1051 int ret; 1052 1053 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1054 1055 if (write && *(int *)(ctl->data)) 1056 *(int *)(ctl->data) = 1; 1057 return ret; 1058 } 1059 1060 static struct ctl_table brnf_table[] = { 1061 { 1062 .procname = "bridge-nf-call-arptables", 1063 .maxlen = sizeof(int), 1064 .mode = 0644, 1065 .proc_handler = brnf_sysctl_call_tables, 1066 }, 1067 { 1068 .procname = "bridge-nf-call-iptables", 1069 .maxlen = sizeof(int), 1070 .mode = 0644, 1071 .proc_handler = brnf_sysctl_call_tables, 1072 }, 1073 { 1074 .procname = "bridge-nf-call-ip6tables", 1075 .maxlen = sizeof(int), 1076 .mode = 0644, 1077 .proc_handler = brnf_sysctl_call_tables, 1078 }, 1079 { 1080 .procname = "bridge-nf-filter-vlan-tagged", 1081 .maxlen = sizeof(int), 1082 .mode = 0644, 1083 .proc_handler = brnf_sysctl_call_tables, 1084 }, 1085 { 1086 .procname = "bridge-nf-filter-pppoe-tagged", 1087 .maxlen = sizeof(int), 1088 .mode = 0644, 1089 .proc_handler = brnf_sysctl_call_tables, 1090 }, 1091 { 1092 .procname = "bridge-nf-pass-vlan-input-dev", 1093 .maxlen = sizeof(int), 1094 .mode = 0644, 1095 .proc_handler = brnf_sysctl_call_tables, 1096 }, 1097 { } 1098 }; 1099 1100 static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) 1101 { 1102 brnf->call_iptables = 1; 1103 brnf->call_ip6tables = 1; 1104 brnf->call_arptables = 1; 1105 brnf->filter_vlan_tagged = 0; 1106 brnf->filter_pppoe_tagged = 0; 1107 brnf->pass_vlan_indev = 0; 1108 } 1109 1110 static int br_netfilter_sysctl_init_net(struct net *net) 1111 { 1112 struct ctl_table *table = brnf_table; 1113 struct brnf_net *brnet; 1114 1115 if (!net_eq(net, &init_net)) { 1116 table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL); 1117 if (!table) 1118 return -ENOMEM; 1119 } 1120 1121 brnet = net_generic(net, brnf_net_id); 1122 table[0].data = &brnet->call_arptables; 1123 table[1].data = &brnet->call_iptables; 1124 table[2].data = &brnet->call_ip6tables; 1125 table[3].data = &brnet->filter_vlan_tagged; 1126 table[4].data = &brnet->filter_pppoe_tagged; 1127 table[5].data = &brnet->pass_vlan_indev; 1128 1129 br_netfilter_sysctl_default(brnet); 1130 1131 brnet->ctl_hdr = register_net_sysctl(net, "net/bridge", table); 1132 if (!brnet->ctl_hdr) { 1133 if (!net_eq(net, &init_net)) 1134 kfree(table); 1135 1136 return -ENOMEM; 1137 } 1138 1139 return 0; 1140 } 1141 1142 static void br_netfilter_sysctl_exit_net(struct net *net, 1143 struct brnf_net *brnet) 1144 { 1145 struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg; 1146 1147 unregister_net_sysctl_table(brnet->ctl_hdr); 1148 if (!net_eq(net, &init_net)) 1149 kfree(table); 1150 } 1151 1152 static int __net_init brnf_init_net(struct net *net) 1153 { 1154 return br_netfilter_sysctl_init_net(net); 1155 } 1156 #endif 1157 1158 static void __net_exit brnf_exit_net(struct net *net) 1159 { 1160 struct brnf_net *brnet; 1161 1162 brnet = net_generic(net, brnf_net_id); 1163 if (brnet->enabled) { 1164 nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1165 brnet->enabled = false; 1166 } 1167 1168 #ifdef CONFIG_SYSCTL 1169 br_netfilter_sysctl_exit_net(net, brnet); 1170 #endif 1171 } 1172 1173 static struct pernet_operations brnf_net_ops __read_mostly = { 1174 #ifdef CONFIG_SYSCTL 1175 .init = brnf_init_net, 1176 #endif 1177 .exit = brnf_exit_net, 1178 .id = &brnf_net_id, 1179 .size = sizeof(struct brnf_net), 1180 }; 1181 1182 static int __init br_netfilter_init(void) 1183 { 1184 int ret; 1185 1186 ret = register_pernet_subsys(&brnf_net_ops); 1187 if (ret < 0) 1188 return ret; 1189 1190 ret = register_netdevice_notifier(&brnf_notifier); 1191 if (ret < 0) { 1192 unregister_pernet_subsys(&brnf_net_ops); 1193 return ret; 1194 } 1195 1196 RCU_INIT_POINTER(nf_br_ops, &br_ops); 1197 printk(KERN_NOTICE "Bridge firewalling registered\n"); 1198 return 0; 1199 } 1200 1201 static void __exit br_netfilter_fini(void) 1202 { 1203 RCU_INIT_POINTER(nf_br_ops, NULL); 1204 unregister_netdevice_notifier(&brnf_notifier); 1205 unregister_pernet_subsys(&brnf_net_ops); 1206 } 1207 1208 module_init(br_netfilter_init); 1209 module_exit(br_netfilter_fini); 1210 1211 MODULE_LICENSE("GPL"); 1212 MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); 1213 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 1214 MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); 1215