1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Handle firewalling 4 * Linux ethernet bridge 5 * 6 * Authors: 7 * Lennert Buytenhek <buytenh@gnu.org> 8 * Bart De Schuymer <bdschuym@pandora.be> 9 * 10 * Lennert dedicates this file to Kerstin Wurdinger. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/slab.h> 16 #include <linux/ip.h> 17 #include <linux/netdevice.h> 18 #include <linux/skbuff.h> 19 #include <linux/if_arp.h> 20 #include <linux/if_ether.h> 21 #include <linux/if_vlan.h> 22 #include <linux/if_pppox.h> 23 #include <linux/ppp_defs.h> 24 #include <linux/netfilter_bridge.h> 25 #include <uapi/linux/netfilter_bridge.h> 26 #include <linux/netfilter_ipv4.h> 27 #include <linux/netfilter_ipv6.h> 28 #include <linux/netfilter_arp.h> 29 #include <linux/in_route.h> 30 #include <linux/rculist.h> 31 #include <linux/inetdevice.h> 32 33 #include <net/ip.h> 34 #include <net/ipv6.h> 35 #include <net/addrconf.h> 36 #include <net/route.h> 37 #include <net/netfilter/br_netfilter.h> 38 #include <net/netns/generic.h> 39 40 #include <linux/uaccess.h> 41 #include "br_private.h" 42 #ifdef CONFIG_SYSCTL 43 #include <linux/sysctl.h> 44 #endif 45 46 static unsigned int brnf_net_id __read_mostly; 47 48 struct brnf_net { 49 bool enabled; 50 51 #ifdef CONFIG_SYSCTL 52 struct ctl_table_header *ctl_hdr; 53 #endif 54 55 /* default value is 1 */ 56 int call_iptables; 57 int call_ip6tables; 58 int call_arptables; 59 60 /* default value is 0 */ 61 int filter_vlan_tagged; 62 int filter_pppoe_tagged; 63 int pass_vlan_indev; 64 }; 65 66 #define IS_IP(skb) \ 67 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) 68 69 #define IS_IPV6(skb) \ 70 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) 71 72 #define IS_ARP(skb) \ 73 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) 74 75 static inline __be16 vlan_proto(const struct sk_buff *skb) 76 { 77 if (skb_vlan_tag_present(skb)) 78 return skb->protocol; 79 else if (skb->protocol == htons(ETH_P_8021Q)) 80 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 81 else 82 return 0; 83 } 84 85 static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net) 86 { 87 struct brnf_net *brnet = net_generic(net, brnf_net_id); 88 89 return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged; 90 } 91 92 static inline bool is_vlan_ipv6(const struct sk_buff *skb, 93 const struct net *net) 94 { 95 struct brnf_net *brnet = net_generic(net, brnf_net_id); 96 97 return vlan_proto(skb) == htons(ETH_P_IPV6) && 98 brnet->filter_vlan_tagged; 99 } 100 101 static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net) 102 { 103 struct brnf_net *brnet = net_generic(net, brnf_net_id); 104 105 return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged; 106 } 107 108 static inline __be16 pppoe_proto(const struct sk_buff *skb) 109 { 110 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 111 sizeof(struct pppoe_hdr))); 112 } 113 114 static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net) 115 { 116 struct brnf_net *brnet = net_generic(net, brnf_net_id); 117 118 return skb->protocol == htons(ETH_P_PPP_SES) && 119 pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged; 120 } 121 122 static inline bool is_pppoe_ipv6(const struct sk_buff *skb, 123 const struct net *net) 124 { 125 struct brnf_net *brnet = net_generic(net, brnf_net_id); 126 127 return skb->protocol == htons(ETH_P_PPP_SES) && 128 pppoe_proto(skb) == htons(PPP_IPV6) && 129 brnet->filter_pppoe_tagged; 130 } 131 132 /* largest possible L2 header, see br_nf_dev_queue_xmit() */ 133 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 134 135 struct brnf_frag_data { 136 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 137 u8 encap_size; 138 u8 size; 139 u16 vlan_tci; 140 __be16 vlan_proto; 141 }; 142 143 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 144 145 static void nf_bridge_info_free(struct sk_buff *skb) 146 { 147 skb_ext_del(skb, SKB_EXT_BRIDGE_NF); 148 } 149 150 static inline struct net_device *bridge_parent(const struct net_device *dev) 151 { 152 struct net_bridge_port *port; 153 154 port = br_port_get_rcu(dev); 155 return port ? port->br->dev : NULL; 156 } 157 158 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) 159 { 160 return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); 161 } 162 163 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 164 { 165 switch (skb->protocol) { 166 case __cpu_to_be16(ETH_P_8021Q): 167 return VLAN_HLEN; 168 case __cpu_to_be16(ETH_P_PPP_SES): 169 return PPPOE_SES_HLEN; 170 default: 171 return 0; 172 } 173 } 174 175 static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) 176 { 177 unsigned int len = nf_bridge_encap_header_len(skb); 178 179 skb_pull(skb, len); 180 skb->network_header += len; 181 } 182 183 static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) 184 { 185 unsigned int len = nf_bridge_encap_header_len(skb); 186 187 skb_pull_rcsum(skb, len); 188 skb->network_header += len; 189 } 190 191 /* When handing a packet over to the IP layer 192 * check whether we have a skb that is in the 193 * expected format 194 */ 195 196 static int br_validate_ipv4(struct net *net, struct sk_buff *skb) 197 { 198 const struct iphdr *iph; 199 u32 len; 200 201 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 202 goto inhdr_error; 203 204 iph = ip_hdr(skb); 205 206 /* Basic sanity checks */ 207 if (iph->ihl < 5 || iph->version != 4) 208 goto inhdr_error; 209 210 if (!pskb_may_pull(skb, iph->ihl*4)) 211 goto inhdr_error; 212 213 iph = ip_hdr(skb); 214 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 215 goto csum_error; 216 217 len = skb_ip_totlen(skb); 218 if (skb->len < len) { 219 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); 220 goto drop; 221 } else if (len < (iph->ihl*4)) 222 goto inhdr_error; 223 224 if (pskb_trim_rcsum(skb, len)) { 225 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 226 goto drop; 227 } 228 229 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 230 /* We should really parse IP options here but until 231 * somebody who actually uses IP options complains to 232 * us we'll just silently ignore the options because 233 * we're lazy! 234 */ 235 return 0; 236 237 csum_error: 238 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); 239 inhdr_error: 240 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 241 drop: 242 return -1; 243 } 244 245 void nf_bridge_update_protocol(struct sk_buff *skb) 246 { 247 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 248 249 switch (nf_bridge->orig_proto) { 250 case BRNF_PROTO_8021Q: 251 skb->protocol = htons(ETH_P_8021Q); 252 break; 253 case BRNF_PROTO_PPPOE: 254 skb->protocol = htons(ETH_P_PPP_SES); 255 break; 256 case BRNF_PROTO_UNCHANGED: 257 break; 258 } 259 } 260 261 /* Obtain the correct destination MAC address, while preserving the original 262 * source MAC address. If we already know this address, we just copy it. If we 263 * don't, we use the neighbour framework to find out. In both cases, we make 264 * sure that br_handle_frame_finish() is called afterwards. 265 */ 266 int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) 267 { 268 struct neighbour *neigh; 269 struct dst_entry *dst; 270 271 skb->dev = bridge_parent(skb->dev); 272 if (!skb->dev) 273 goto free_skb; 274 dst = skb_dst(skb); 275 neigh = dst_neigh_lookup_skb(dst, skb); 276 if (neigh) { 277 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 278 int ret; 279 280 if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && 281 READ_ONCE(neigh->hh.hh_len)) { 282 neigh_hh_bridge(&neigh->hh, skb); 283 skb->dev = nf_bridge->physindev; 284 ret = br_handle_frame_finish(net, sk, skb); 285 } else { 286 /* the neighbour function below overwrites the complete 287 * MAC header, so we save the Ethernet source address and 288 * protocol number. 289 */ 290 skb_copy_from_linear_data_offset(skb, 291 -(ETH_HLEN-ETH_ALEN), 292 nf_bridge->neigh_header, 293 ETH_HLEN-ETH_ALEN); 294 /* tell br_dev_xmit to continue with forwarding */ 295 nf_bridge->bridged_dnat = 1; 296 /* FIXME Need to refragment */ 297 ret = READ_ONCE(neigh->output)(neigh, skb); 298 } 299 neigh_release(neigh); 300 return ret; 301 } 302 free_skb: 303 kfree_skb(skb); 304 return 0; 305 } 306 307 static inline bool 308 br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, 309 const struct nf_bridge_info *nf_bridge) 310 { 311 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; 312 } 313 314 /* This requires some explaining. If DNAT has taken place, 315 * we will need to fix up the destination Ethernet address. 316 * This is also true when SNAT takes place (for the reply direction). 317 * 318 * There are two cases to consider: 319 * 1. The packet was DNAT'ed to a device in the same bridge 320 * port group as it was received on. We can still bridge 321 * the packet. 322 * 2. The packet was DNAT'ed to a different device, either 323 * a non-bridged device or another bridge port group. 324 * The packet will need to be routed. 325 * 326 * The correct way of distinguishing between these two cases is to 327 * call ip_route_input() and to look at skb->dst->dev, which is 328 * changed to the destination device if ip_route_input() succeeds. 329 * 330 * Let's first consider the case that ip_route_input() succeeds: 331 * 332 * If the output device equals the logical bridge device the packet 333 * came in on, we can consider this bridging. The corresponding MAC 334 * address will be obtained in br_nf_pre_routing_finish_bridge. 335 * Otherwise, the packet is considered to be routed and we just 336 * change the destination MAC address so that the packet will 337 * later be passed up to the IP stack to be routed. For a redirected 338 * packet, ip_route_input() will give back the localhost as output device, 339 * which differs from the bridge device. 340 * 341 * Let's now consider the case that ip_route_input() fails: 342 * 343 * This can be because the destination address is martian, in which case 344 * the packet will be dropped. 345 * If IP forwarding is disabled, ip_route_input() will fail, while 346 * ip_route_output_key() can return success. The source 347 * address for ip_route_output_key() is set to zero, so ip_route_output_key() 348 * thinks we're handling a locally generated packet and won't care 349 * if IP forwarding is enabled. If the output device equals the logical bridge 350 * device, we proceed as if ip_route_input() succeeded. If it differs from the 351 * logical bridge port or if ip_route_output_key() fails we drop the packet. 352 */ 353 static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 354 { 355 struct net_device *dev = skb->dev; 356 struct iphdr *iph = ip_hdr(skb); 357 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 358 struct rtable *rt; 359 int err; 360 361 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 362 363 if (nf_bridge->pkt_otherhost) { 364 skb->pkt_type = PACKET_OTHERHOST; 365 nf_bridge->pkt_otherhost = false; 366 } 367 nf_bridge->in_prerouting = 0; 368 if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { 369 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 370 struct in_device *in_dev = __in_dev_get_rcu(dev); 371 372 /* If err equals -EHOSTUNREACH the error is due to a 373 * martian destination or due to the fact that 374 * forwarding is disabled. For most martian packets, 375 * ip_route_output_key() will fail. It won't fail for 2 types of 376 * martian destinations: loopback destinations and destination 377 * 0.0.0.0. In both cases the packet will be dropped because the 378 * destination is the loopback device and not the bridge. */ 379 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 380 goto free_skb; 381 382 rt = ip_route_output(net, iph->daddr, 0, 383 RT_TOS(iph->tos), 0); 384 if (!IS_ERR(rt)) { 385 /* - Bridged-and-DNAT'ed traffic doesn't 386 * require ip_forwarding. */ 387 if (rt->dst.dev == dev) { 388 skb_dst_drop(skb); 389 skb_dst_set(skb, &rt->dst); 390 goto bridged_dnat; 391 } 392 ip_rt_put(rt); 393 } 394 free_skb: 395 kfree_skb(skb); 396 return 0; 397 } else { 398 if (skb_dst(skb)->dev == dev) { 399 bridged_dnat: 400 skb->dev = nf_bridge->physindev; 401 nf_bridge_update_protocol(skb); 402 nf_bridge_push_encap_header(skb); 403 br_nf_hook_thresh(NF_BR_PRE_ROUTING, 404 net, sk, skb, skb->dev, 405 NULL, 406 br_nf_pre_routing_finish_bridge); 407 return 0; 408 } 409 ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); 410 skb->pkt_type = PACKET_HOST; 411 } 412 } else { 413 rt = bridge_parent_rtable(nf_bridge->physindev); 414 if (!rt) { 415 kfree_skb(skb); 416 return 0; 417 } 418 skb_dst_drop(skb); 419 skb_dst_set_noref(skb, &rt->dst); 420 } 421 422 skb->dev = nf_bridge->physindev; 423 nf_bridge_update_protocol(skb); 424 nf_bridge_push_encap_header(skb); 425 br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, 426 br_handle_frame_finish); 427 return 0; 428 } 429 430 static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, 431 const struct net_device *dev, 432 const struct net *net) 433 { 434 struct net_device *vlan, *br; 435 struct brnf_net *brnet = net_generic(net, brnf_net_id); 436 437 br = bridge_parent(dev); 438 439 if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) 440 return br; 441 442 vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, 443 skb_vlan_tag_get(skb) & VLAN_VID_MASK); 444 445 return vlan ? vlan : br; 446 } 447 448 /* Some common code for IPv4/IPv6 */ 449 struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) 450 { 451 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 452 453 if (skb->pkt_type == PACKET_OTHERHOST) { 454 skb->pkt_type = PACKET_HOST; 455 nf_bridge->pkt_otherhost = true; 456 } 457 458 nf_bridge->in_prerouting = 1; 459 nf_bridge->physindev = skb->dev; 460 skb->dev = brnf_get_logical_dev(skb, skb->dev, net); 461 462 if (skb->protocol == htons(ETH_P_8021Q)) 463 nf_bridge->orig_proto = BRNF_PROTO_8021Q; 464 else if (skb->protocol == htons(ETH_P_PPP_SES)) 465 nf_bridge->orig_proto = BRNF_PROTO_PPPOE; 466 467 /* Must drop socket now because of tproxy. */ 468 skb_orphan(skb); 469 return skb->dev; 470 } 471 472 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. 473 * Replicate the checks that IPv4 does on packet reception. 474 * Set skb->dev to the bridge device (i.e. parent of the 475 * receiving device) to make netfilter happy, the REDIRECT 476 * target in particular. Save the original destination IP 477 * address to be able to detect DNAT afterwards. */ 478 static unsigned int br_nf_pre_routing(void *priv, 479 struct sk_buff *skb, 480 const struct nf_hook_state *state) 481 { 482 struct nf_bridge_info *nf_bridge; 483 struct net_bridge_port *p; 484 struct net_bridge *br; 485 __u32 len = nf_bridge_encap_header_len(skb); 486 struct brnf_net *brnet; 487 488 if (unlikely(!pskb_may_pull(skb, len))) 489 return NF_DROP; 490 491 p = br_port_get_rcu(state->in); 492 if (p == NULL) 493 return NF_DROP; 494 br = p->br; 495 496 brnet = net_generic(state->net, brnf_net_id); 497 if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 498 is_pppoe_ipv6(skb, state->net)) { 499 if (!brnet->call_ip6tables && 500 !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) 501 return NF_ACCEPT; 502 if (!ipv6_mod_enabled()) { 503 pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); 504 return NF_DROP; 505 } 506 507 nf_bridge_pull_encap_header_rcsum(skb); 508 return br_nf_pre_routing_ipv6(priv, skb, state); 509 } 510 511 if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) 512 return NF_ACCEPT; 513 514 if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) && 515 !is_pppoe_ip(skb, state->net)) 516 return NF_ACCEPT; 517 518 nf_bridge_pull_encap_header_rcsum(skb); 519 520 if (br_validate_ipv4(state->net, skb)) 521 return NF_DROP; 522 523 if (!nf_bridge_alloc(skb)) 524 return NF_DROP; 525 if (!setup_pre_routing(skb, state->net)) 526 return NF_DROP; 527 528 nf_bridge = nf_bridge_info_get(skb); 529 nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; 530 531 skb->protocol = htons(ETH_P_IP); 532 skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; 533 534 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, 535 skb->dev, NULL, 536 br_nf_pre_routing_finish); 537 538 return NF_STOLEN; 539 } 540 541 542 /* PF_BRIDGE/FORWARD *************************************************/ 543 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 544 { 545 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 546 struct net_device *in; 547 548 if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) { 549 550 if (skb->protocol == htons(ETH_P_IP)) 551 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 552 553 if (skb->protocol == htons(ETH_P_IPV6)) 554 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; 555 556 in = nf_bridge->physindev; 557 if (nf_bridge->pkt_otherhost) { 558 skb->pkt_type = PACKET_OTHERHOST; 559 nf_bridge->pkt_otherhost = false; 560 } 561 nf_bridge_update_protocol(skb); 562 } else { 563 in = *((struct net_device **)(skb->cb)); 564 } 565 nf_bridge_push_encap_header(skb); 566 567 br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, 568 br_forward_finish); 569 return 0; 570 } 571 572 573 /* This is the 'purely bridged' case. For IP, we pass the packet to 574 * netfilter with indev and outdev set to the bridge device, 575 * but we are still able to filter on the 'real' indev/outdev 576 * because of the physdev module. For ARP, indev and outdev are the 577 * bridge ports. */ 578 static unsigned int br_nf_forward_ip(void *priv, 579 struct sk_buff *skb, 580 const struct nf_hook_state *state) 581 { 582 struct nf_bridge_info *nf_bridge; 583 struct net_device *parent; 584 u_int8_t pf; 585 586 nf_bridge = nf_bridge_info_get(skb); 587 if (!nf_bridge) 588 return NF_ACCEPT; 589 590 /* Need exclusive nf_bridge_info since we might have multiple 591 * different physoutdevs. */ 592 if (!nf_bridge_unshare(skb)) 593 return NF_DROP; 594 595 nf_bridge = nf_bridge_info_get(skb); 596 if (!nf_bridge) 597 return NF_DROP; 598 599 parent = bridge_parent(state->out); 600 if (!parent) 601 return NF_DROP; 602 603 if (IS_IP(skb) || is_vlan_ip(skb, state->net) || 604 is_pppoe_ip(skb, state->net)) 605 pf = NFPROTO_IPV4; 606 else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 607 is_pppoe_ipv6(skb, state->net)) 608 pf = NFPROTO_IPV6; 609 else 610 return NF_ACCEPT; 611 612 nf_bridge_pull_encap_header(skb); 613 614 if (skb->pkt_type == PACKET_OTHERHOST) { 615 skb->pkt_type = PACKET_HOST; 616 nf_bridge->pkt_otherhost = true; 617 } 618 619 if (pf == NFPROTO_IPV4) { 620 if (br_validate_ipv4(state->net, skb)) 621 return NF_DROP; 622 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 623 } 624 625 if (pf == NFPROTO_IPV6) { 626 if (br_validate_ipv6(state->net, skb)) 627 return NF_DROP; 628 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 629 } 630 631 nf_bridge->physoutdev = skb->dev; 632 if (pf == NFPROTO_IPV4) 633 skb->protocol = htons(ETH_P_IP); 634 else 635 skb->protocol = htons(ETH_P_IPV6); 636 637 NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, 638 brnf_get_logical_dev(skb, state->in, state->net), 639 parent, br_nf_forward_finish); 640 641 return NF_STOLEN; 642 } 643 644 static unsigned int br_nf_forward_arp(void *priv, 645 struct sk_buff *skb, 646 const struct nf_hook_state *state) 647 { 648 struct net_bridge_port *p; 649 struct net_bridge *br; 650 struct net_device **d = (struct net_device **)(skb->cb); 651 struct brnf_net *brnet; 652 653 p = br_port_get_rcu(state->out); 654 if (p == NULL) 655 return NF_ACCEPT; 656 br = p->br; 657 658 brnet = net_generic(state->net, brnf_net_id); 659 if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) 660 return NF_ACCEPT; 661 662 if (!IS_ARP(skb)) { 663 if (!is_vlan_arp(skb, state->net)) 664 return NF_ACCEPT; 665 nf_bridge_pull_encap_header(skb); 666 } 667 668 if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) 669 return NF_DROP; 670 671 if (arp_hdr(skb)->ar_pln != 4) { 672 if (is_vlan_arp(skb, state->net)) 673 nf_bridge_push_encap_header(skb); 674 return NF_ACCEPT; 675 } 676 *d = state->in; 677 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, 678 state->in, state->out, br_nf_forward_finish); 679 680 return NF_STOLEN; 681 } 682 683 static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 684 { 685 struct brnf_frag_data *data; 686 int err; 687 688 data = this_cpu_ptr(&brnf_frag_data_storage); 689 err = skb_cow_head(skb, data->size); 690 691 if (err) { 692 kfree_skb(skb); 693 return 0; 694 } 695 696 if (data->vlan_proto) 697 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); 698 699 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); 700 __skb_push(skb, data->encap_size); 701 702 nf_bridge_info_free(skb); 703 return br_dev_queue_push_xmit(net, sk, skb); 704 } 705 706 static int 707 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 708 int (*output)(struct net *, struct sock *, struct sk_buff *)) 709 { 710 unsigned int mtu = ip_skb_dst_mtu(sk, skb); 711 struct iphdr *iph = ip_hdr(skb); 712 713 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || 714 (IPCB(skb)->frag_max_size && 715 IPCB(skb)->frag_max_size > mtu))) { 716 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 717 kfree_skb(skb); 718 return -EMSGSIZE; 719 } 720 721 return ip_do_fragment(net, sk, skb, output); 722 } 723 724 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 725 { 726 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 727 728 if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) 729 return PPPOE_SES_HLEN; 730 return 0; 731 } 732 733 static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 734 { 735 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 736 unsigned int mtu, mtu_reserved; 737 738 mtu_reserved = nf_bridge_mtu_reduction(skb); 739 mtu = skb->dev->mtu; 740 741 if (nf_bridge->pkt_otherhost) { 742 skb->pkt_type = PACKET_OTHERHOST; 743 nf_bridge->pkt_otherhost = false; 744 } 745 746 if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) 747 mtu = nf_bridge->frag_max_size; 748 749 nf_bridge_update_protocol(skb); 750 nf_bridge_push_encap_header(skb); 751 752 if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { 753 nf_bridge_info_free(skb); 754 return br_dev_queue_push_xmit(net, sk, skb); 755 } 756 757 /* This is wrong! We should preserve the original fragment 758 * boundaries by preserving frag_list rather than refragmenting. 759 */ 760 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && 761 skb->protocol == htons(ETH_P_IP)) { 762 struct brnf_frag_data *data; 763 764 if (br_validate_ipv4(net, skb)) 765 goto drop; 766 767 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 768 769 data = this_cpu_ptr(&brnf_frag_data_storage); 770 771 if (skb_vlan_tag_present(skb)) { 772 data->vlan_tci = skb->vlan_tci; 773 data->vlan_proto = skb->vlan_proto; 774 } else { 775 data->vlan_proto = 0; 776 } 777 778 data->encap_size = nf_bridge_encap_header_len(skb); 779 data->size = ETH_HLEN + data->encap_size; 780 781 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 782 data->size); 783 784 return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); 785 } 786 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && 787 skb->protocol == htons(ETH_P_IPV6)) { 788 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 789 struct brnf_frag_data *data; 790 791 if (br_validate_ipv6(net, skb)) 792 goto drop; 793 794 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 795 796 data = this_cpu_ptr(&brnf_frag_data_storage); 797 data->encap_size = nf_bridge_encap_header_len(skb); 798 data->size = ETH_HLEN + data->encap_size; 799 800 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 801 data->size); 802 803 if (v6ops) 804 return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); 805 806 kfree_skb(skb); 807 return -EMSGSIZE; 808 } 809 nf_bridge_info_free(skb); 810 return br_dev_queue_push_xmit(net, sk, skb); 811 drop: 812 kfree_skb(skb); 813 return 0; 814 } 815 816 /* PF_BRIDGE/POST_ROUTING ********************************************/ 817 static unsigned int br_nf_post_routing(void *priv, 818 struct sk_buff *skb, 819 const struct nf_hook_state *state) 820 { 821 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 822 struct net_device *realoutdev = bridge_parent(skb->dev); 823 u_int8_t pf; 824 825 /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in 826 * on a bridge, but was delivered locally and is now being routed: 827 * 828 * POST_ROUTING was already invoked from the ip stack. 829 */ 830 if (!nf_bridge || !nf_bridge->physoutdev) 831 return NF_ACCEPT; 832 833 if (!realoutdev) 834 return NF_DROP; 835 836 if (IS_IP(skb) || is_vlan_ip(skb, state->net) || 837 is_pppoe_ip(skb, state->net)) 838 pf = NFPROTO_IPV4; 839 else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 840 is_pppoe_ipv6(skb, state->net)) 841 pf = NFPROTO_IPV6; 842 else 843 return NF_ACCEPT; 844 845 if (skb->pkt_type == PACKET_OTHERHOST) { 846 skb->pkt_type = PACKET_HOST; 847 nf_bridge->pkt_otherhost = true; 848 } 849 850 nf_bridge_pull_encap_header(skb); 851 if (pf == NFPROTO_IPV4) 852 skb->protocol = htons(ETH_P_IP); 853 else 854 skb->protocol = htons(ETH_P_IPV6); 855 856 NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, 857 NULL, realoutdev, 858 br_nf_dev_queue_xmit); 859 860 return NF_STOLEN; 861 } 862 863 /* IP/SABOTAGE *****************************************************/ 864 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 865 * for the second time. */ 866 static unsigned int ip_sabotage_in(void *priv, 867 struct sk_buff *skb, 868 const struct nf_hook_state *state) 869 { 870 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 871 872 if (nf_bridge) { 873 if (nf_bridge->sabotage_in_done) 874 return NF_ACCEPT; 875 876 if (!nf_bridge->in_prerouting && 877 !netif_is_l3_master(skb->dev) && 878 !netif_is_l3_slave(skb->dev)) { 879 nf_bridge->sabotage_in_done = 1; 880 state->okfn(state->net, state->sk, skb); 881 return NF_STOLEN; 882 } 883 } 884 885 return NF_ACCEPT; 886 } 887 888 /* This is called when br_netfilter has called into iptables/netfilter, 889 * and DNAT has taken place on a bridge-forwarded packet. 890 * 891 * neigh->output has created a new MAC header, with local br0 MAC 892 * as saddr. 893 * 894 * This restores the original MAC saddr of the bridged packet 895 * before invoking bridge forward logic to transmit the packet. 896 */ 897 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) 898 { 899 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 900 901 skb_pull(skb, ETH_HLEN); 902 nf_bridge->bridged_dnat = 0; 903 904 BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); 905 906 skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), 907 nf_bridge->neigh_header, 908 ETH_HLEN - ETH_ALEN); 909 skb->dev = nf_bridge->physindev; 910 911 nf_bridge->physoutdev = NULL; 912 br_handle_frame_finish(dev_net(skb->dev), NULL, skb); 913 } 914 915 static int br_nf_dev_xmit(struct sk_buff *skb) 916 { 917 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 918 919 if (nf_bridge && nf_bridge->bridged_dnat) { 920 br_nf_pre_routing_finish_bridge_slow(skb); 921 return 1; 922 } 923 return 0; 924 } 925 926 static const struct nf_br_ops br_ops = { 927 .br_dev_xmit_hook = br_nf_dev_xmit, 928 }; 929 930 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 931 * br_dev_queue_push_xmit is called afterwards */ 932 static const struct nf_hook_ops br_nf_ops[] = { 933 { 934 .hook = br_nf_pre_routing, 935 .pf = NFPROTO_BRIDGE, 936 .hooknum = NF_BR_PRE_ROUTING, 937 .priority = NF_BR_PRI_BRNF, 938 }, 939 { 940 .hook = br_nf_forward_ip, 941 .pf = NFPROTO_BRIDGE, 942 .hooknum = NF_BR_FORWARD, 943 .priority = NF_BR_PRI_BRNF - 1, 944 }, 945 { 946 .hook = br_nf_forward_arp, 947 .pf = NFPROTO_BRIDGE, 948 .hooknum = NF_BR_FORWARD, 949 .priority = NF_BR_PRI_BRNF, 950 }, 951 { 952 .hook = br_nf_post_routing, 953 .pf = NFPROTO_BRIDGE, 954 .hooknum = NF_BR_POST_ROUTING, 955 .priority = NF_BR_PRI_LAST, 956 }, 957 { 958 .hook = ip_sabotage_in, 959 .pf = NFPROTO_IPV4, 960 .hooknum = NF_INET_PRE_ROUTING, 961 .priority = NF_IP_PRI_FIRST, 962 }, 963 { 964 .hook = ip_sabotage_in, 965 .pf = NFPROTO_IPV6, 966 .hooknum = NF_INET_PRE_ROUTING, 967 .priority = NF_IP6_PRI_FIRST, 968 }, 969 }; 970 971 static int brnf_device_event(struct notifier_block *unused, unsigned long event, 972 void *ptr) 973 { 974 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 975 struct brnf_net *brnet; 976 struct net *net; 977 int ret; 978 979 if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) 980 return NOTIFY_DONE; 981 982 ASSERT_RTNL(); 983 984 net = dev_net(dev); 985 brnet = net_generic(net, brnf_net_id); 986 if (brnet->enabled) 987 return NOTIFY_OK; 988 989 ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 990 if (ret) 991 return NOTIFY_BAD; 992 993 brnet->enabled = true; 994 return NOTIFY_OK; 995 } 996 997 static struct notifier_block brnf_notifier __read_mostly = { 998 .notifier_call = brnf_device_event, 999 }; 1000 1001 /* recursively invokes nf_hook_slow (again), skipping already-called 1002 * hooks (< NF_BR_PRI_BRNF). 1003 * 1004 * Called with rcu read lock held. 1005 */ 1006 int br_nf_hook_thresh(unsigned int hook, struct net *net, 1007 struct sock *sk, struct sk_buff *skb, 1008 struct net_device *indev, 1009 struct net_device *outdev, 1010 int (*okfn)(struct net *, struct sock *, 1011 struct sk_buff *)) 1012 { 1013 const struct nf_hook_entries *e; 1014 struct nf_hook_state state; 1015 struct nf_hook_ops **ops; 1016 unsigned int i; 1017 int ret; 1018 1019 e = rcu_dereference(net->nf.hooks_bridge[hook]); 1020 if (!e) 1021 return okfn(net, sk, skb); 1022 1023 ops = nf_hook_entries_get_hook_ops(e); 1024 for (i = 0; i < e->num_hook_entries; i++) { 1025 /* These hooks have already been called */ 1026 if (ops[i]->priority < NF_BR_PRI_BRNF) 1027 continue; 1028 1029 /* These hooks have not been called yet, run them. */ 1030 if (ops[i]->priority > NF_BR_PRI_BRNF) 1031 break; 1032 1033 /* take a closer look at NF_BR_PRI_BRNF. */ 1034 if (ops[i]->hook == br_nf_pre_routing) { 1035 /* This hook diverted the skb to this function, 1036 * hooks after this have not been run yet. 1037 */ 1038 i++; 1039 break; 1040 } 1041 } 1042 1043 nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, 1044 sk, net, okfn); 1045 1046 ret = nf_hook_slow(skb, &state, e, i); 1047 if (ret == 1) 1048 ret = okfn(net, sk, skb); 1049 1050 return ret; 1051 } 1052 1053 #ifdef CONFIG_SYSCTL 1054 static 1055 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, 1056 void *buffer, size_t *lenp, loff_t *ppos) 1057 { 1058 int ret; 1059 1060 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1061 1062 if (write && *(int *)(ctl->data)) 1063 *(int *)(ctl->data) = 1; 1064 return ret; 1065 } 1066 1067 static struct ctl_table brnf_table[] = { 1068 { 1069 .procname = "bridge-nf-call-arptables", 1070 .maxlen = sizeof(int), 1071 .mode = 0644, 1072 .proc_handler = brnf_sysctl_call_tables, 1073 }, 1074 { 1075 .procname = "bridge-nf-call-iptables", 1076 .maxlen = sizeof(int), 1077 .mode = 0644, 1078 .proc_handler = brnf_sysctl_call_tables, 1079 }, 1080 { 1081 .procname = "bridge-nf-call-ip6tables", 1082 .maxlen = sizeof(int), 1083 .mode = 0644, 1084 .proc_handler = brnf_sysctl_call_tables, 1085 }, 1086 { 1087 .procname = "bridge-nf-filter-vlan-tagged", 1088 .maxlen = sizeof(int), 1089 .mode = 0644, 1090 .proc_handler = brnf_sysctl_call_tables, 1091 }, 1092 { 1093 .procname = "bridge-nf-filter-pppoe-tagged", 1094 .maxlen = sizeof(int), 1095 .mode = 0644, 1096 .proc_handler = brnf_sysctl_call_tables, 1097 }, 1098 { 1099 .procname = "bridge-nf-pass-vlan-input-dev", 1100 .maxlen = sizeof(int), 1101 .mode = 0644, 1102 .proc_handler = brnf_sysctl_call_tables, 1103 }, 1104 { } 1105 }; 1106 1107 static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) 1108 { 1109 brnf->call_iptables = 1; 1110 brnf->call_ip6tables = 1; 1111 brnf->call_arptables = 1; 1112 brnf->filter_vlan_tagged = 0; 1113 brnf->filter_pppoe_tagged = 0; 1114 brnf->pass_vlan_indev = 0; 1115 } 1116 1117 static int br_netfilter_sysctl_init_net(struct net *net) 1118 { 1119 struct ctl_table *table = brnf_table; 1120 struct brnf_net *brnet; 1121 1122 if (!net_eq(net, &init_net)) { 1123 table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL); 1124 if (!table) 1125 return -ENOMEM; 1126 } 1127 1128 brnet = net_generic(net, brnf_net_id); 1129 table[0].data = &brnet->call_arptables; 1130 table[1].data = &brnet->call_iptables; 1131 table[2].data = &brnet->call_ip6tables; 1132 table[3].data = &brnet->filter_vlan_tagged; 1133 table[4].data = &brnet->filter_pppoe_tagged; 1134 table[5].data = &brnet->pass_vlan_indev; 1135 1136 br_netfilter_sysctl_default(brnet); 1137 1138 brnet->ctl_hdr = register_net_sysctl_sz(net, "net/bridge", table, 1139 ARRAY_SIZE(brnf_table)); 1140 if (!brnet->ctl_hdr) { 1141 if (!net_eq(net, &init_net)) 1142 kfree(table); 1143 1144 return -ENOMEM; 1145 } 1146 1147 return 0; 1148 } 1149 1150 static void br_netfilter_sysctl_exit_net(struct net *net, 1151 struct brnf_net *brnet) 1152 { 1153 struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg; 1154 1155 unregister_net_sysctl_table(brnet->ctl_hdr); 1156 if (!net_eq(net, &init_net)) 1157 kfree(table); 1158 } 1159 1160 static int __net_init brnf_init_net(struct net *net) 1161 { 1162 return br_netfilter_sysctl_init_net(net); 1163 } 1164 #endif 1165 1166 static void __net_exit brnf_exit_net(struct net *net) 1167 { 1168 struct brnf_net *brnet; 1169 1170 brnet = net_generic(net, brnf_net_id); 1171 if (brnet->enabled) { 1172 nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1173 brnet->enabled = false; 1174 } 1175 1176 #ifdef CONFIG_SYSCTL 1177 br_netfilter_sysctl_exit_net(net, brnet); 1178 #endif 1179 } 1180 1181 static struct pernet_operations brnf_net_ops __read_mostly = { 1182 #ifdef CONFIG_SYSCTL 1183 .init = brnf_init_net, 1184 #endif 1185 .exit = brnf_exit_net, 1186 .id = &brnf_net_id, 1187 .size = sizeof(struct brnf_net), 1188 }; 1189 1190 static int __init br_netfilter_init(void) 1191 { 1192 int ret; 1193 1194 ret = register_pernet_subsys(&brnf_net_ops); 1195 if (ret < 0) 1196 return ret; 1197 1198 ret = register_netdevice_notifier(&brnf_notifier); 1199 if (ret < 0) { 1200 unregister_pernet_subsys(&brnf_net_ops); 1201 return ret; 1202 } 1203 1204 RCU_INIT_POINTER(nf_br_ops, &br_ops); 1205 printk(KERN_NOTICE "Bridge firewalling registered\n"); 1206 return 0; 1207 } 1208 1209 static void __exit br_netfilter_fini(void) 1210 { 1211 RCU_INIT_POINTER(nf_br_ops, NULL); 1212 unregister_netdevice_notifier(&brnf_notifier); 1213 unregister_pernet_subsys(&brnf_net_ops); 1214 } 1215 1216 module_init(br_netfilter_init); 1217 module_exit(br_netfilter_fini); 1218 1219 MODULE_LICENSE("GPL"); 1220 MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); 1221 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 1222 MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); 1223