1 /* 2 * Handle firewalling 3 * Linux ethernet bridge 4 * 5 * Authors: 6 * Lennert Buytenhek <buytenh@gnu.org> 7 * Bart De Schuymer <bdschuym@pandora.be> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 * 14 * Lennert dedicates this file to Kerstin Wurdinger. 15 */ 16 17 #include <linux/module.h> 18 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/ip.h> 21 #include <linux/netdevice.h> 22 #include <linux/skbuff.h> 23 #include <linux/if_arp.h> 24 #include <linux/if_ether.h> 25 #include <linux/if_vlan.h> 26 #include <linux/if_pppox.h> 27 #include <linux/ppp_defs.h> 28 #include <linux/netfilter_bridge.h> 29 #include <linux/netfilter_ipv4.h> 30 #include <linux/netfilter_ipv6.h> 31 #include <linux/netfilter_arp.h> 32 #include <linux/in_route.h> 33 #include <linux/rculist.h> 34 #include <linux/inetdevice.h> 35 36 #include <net/ip.h> 37 #include <net/ipv6.h> 38 #include <net/addrconf.h> 39 #include <net/route.h> 40 #include <net/netfilter/br_netfilter.h> 41 #include <net/netns/generic.h> 42 43 #include <linux/uaccess.h> 44 #include "br_private.h" 45 #ifdef CONFIG_SYSCTL 46 #include <linux/sysctl.h> 47 #endif 48 49 static unsigned int brnf_net_id __read_mostly; 50 51 struct brnf_net { 52 bool enabled; 53 }; 54 55 #ifdef CONFIG_SYSCTL 56 static struct ctl_table_header *brnf_sysctl_header; 57 static int brnf_call_iptables __read_mostly = 1; 58 static int brnf_call_ip6tables __read_mostly = 1; 59 static int brnf_call_arptables __read_mostly = 1; 60 static int brnf_filter_vlan_tagged __read_mostly; 61 static int brnf_filter_pppoe_tagged __read_mostly; 62 static int brnf_pass_vlan_indev __read_mostly; 63 #else 64 #define brnf_call_iptables 1 65 #define brnf_call_ip6tables 1 66 #define brnf_call_arptables 1 67 #define brnf_filter_vlan_tagged 0 68 #define brnf_filter_pppoe_tagged 0 69 #define brnf_pass_vlan_indev 0 70 #endif 71 72 #define IS_IP(skb) \ 73 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) 74 75 #define IS_IPV6(skb) \ 76 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) 77 78 #define IS_ARP(skb) \ 79 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) 80 81 static inline __be16 vlan_proto(const struct sk_buff *skb) 82 { 83 if (skb_vlan_tag_present(skb)) 84 return skb->protocol; 85 else if (skb->protocol == htons(ETH_P_8021Q)) 86 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 87 else 88 return 0; 89 } 90 91 #define IS_VLAN_IP(skb) \ 92 (vlan_proto(skb) == htons(ETH_P_IP) && \ 93 brnf_filter_vlan_tagged) 94 95 #define IS_VLAN_IPV6(skb) \ 96 (vlan_proto(skb) == htons(ETH_P_IPV6) && \ 97 brnf_filter_vlan_tagged) 98 99 #define IS_VLAN_ARP(skb) \ 100 (vlan_proto(skb) == htons(ETH_P_ARP) && \ 101 brnf_filter_vlan_tagged) 102 103 static inline __be16 pppoe_proto(const struct sk_buff *skb) 104 { 105 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 106 sizeof(struct pppoe_hdr))); 107 } 108 109 #define IS_PPPOE_IP(skb) \ 110 (skb->protocol == htons(ETH_P_PPP_SES) && \ 111 pppoe_proto(skb) == htons(PPP_IP) && \ 112 brnf_filter_pppoe_tagged) 113 114 #define IS_PPPOE_IPV6(skb) \ 115 (skb->protocol == htons(ETH_P_PPP_SES) && \ 116 pppoe_proto(skb) == htons(PPP_IPV6) && \ 117 brnf_filter_pppoe_tagged) 118 119 /* largest possible L2 header, see br_nf_dev_queue_xmit() */ 120 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 121 122 struct brnf_frag_data { 123 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 124 u8 encap_size; 125 u8 size; 126 u16 vlan_tci; 127 __be16 vlan_proto; 128 }; 129 130 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 131 132 static void nf_bridge_info_free(struct sk_buff *skb) 133 { 134 if (skb->nf_bridge) { 135 nf_bridge_put(skb->nf_bridge); 136 skb->nf_bridge = NULL; 137 } 138 } 139 140 static inline struct net_device *bridge_parent(const struct net_device *dev) 141 { 142 struct net_bridge_port *port; 143 144 port = br_port_get_rcu(dev); 145 return port ? port->br->dev : NULL; 146 } 147 148 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) 149 { 150 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 151 152 if (refcount_read(&nf_bridge->use) > 1) { 153 struct nf_bridge_info *tmp = nf_bridge_alloc(skb); 154 155 if (tmp) { 156 memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); 157 refcount_set(&tmp->use, 1); 158 } 159 nf_bridge_put(nf_bridge); 160 nf_bridge = tmp; 161 } 162 return nf_bridge; 163 } 164 165 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 166 { 167 switch (skb->protocol) { 168 case __cpu_to_be16(ETH_P_8021Q): 169 return VLAN_HLEN; 170 case __cpu_to_be16(ETH_P_PPP_SES): 171 return PPPOE_SES_HLEN; 172 default: 173 return 0; 174 } 175 } 176 177 static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) 178 { 179 unsigned int len = nf_bridge_encap_header_len(skb); 180 181 skb_pull(skb, len); 182 skb->network_header += len; 183 } 184 185 static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) 186 { 187 unsigned int len = nf_bridge_encap_header_len(skb); 188 189 skb_pull_rcsum(skb, len); 190 skb->network_header += len; 191 } 192 193 /* When handing a packet over to the IP layer 194 * check whether we have a skb that is in the 195 * expected format 196 */ 197 198 static int br_validate_ipv4(struct net *net, struct sk_buff *skb) 199 { 200 const struct iphdr *iph; 201 u32 len; 202 203 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 204 goto inhdr_error; 205 206 iph = ip_hdr(skb); 207 208 /* Basic sanity checks */ 209 if (iph->ihl < 5 || iph->version != 4) 210 goto inhdr_error; 211 212 if (!pskb_may_pull(skb, iph->ihl*4)) 213 goto inhdr_error; 214 215 iph = ip_hdr(skb); 216 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 217 goto inhdr_error; 218 219 len = ntohs(iph->tot_len); 220 if (skb->len < len) { 221 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); 222 goto drop; 223 } else if (len < (iph->ihl*4)) 224 goto inhdr_error; 225 226 if (pskb_trim_rcsum(skb, len)) { 227 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 228 goto drop; 229 } 230 231 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 232 /* We should really parse IP options here but until 233 * somebody who actually uses IP options complains to 234 * us we'll just silently ignore the options because 235 * we're lazy! 236 */ 237 return 0; 238 239 inhdr_error: 240 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 241 drop: 242 return -1; 243 } 244 245 void nf_bridge_update_protocol(struct sk_buff *skb) 246 { 247 switch (skb->nf_bridge->orig_proto) { 248 case BRNF_PROTO_8021Q: 249 skb->protocol = htons(ETH_P_8021Q); 250 break; 251 case BRNF_PROTO_PPPOE: 252 skb->protocol = htons(ETH_P_PPP_SES); 253 break; 254 case BRNF_PROTO_UNCHANGED: 255 break; 256 } 257 } 258 259 /* Obtain the correct destination MAC address, while preserving the original 260 * source MAC address. If we already know this address, we just copy it. If we 261 * don't, we use the neighbour framework to find out. In both cases, we make 262 * sure that br_handle_frame_finish() is called afterwards. 263 */ 264 int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) 265 { 266 struct neighbour *neigh; 267 struct dst_entry *dst; 268 269 skb->dev = bridge_parent(skb->dev); 270 if (!skb->dev) 271 goto free_skb; 272 dst = skb_dst(skb); 273 neigh = dst_neigh_lookup_skb(dst, skb); 274 if (neigh) { 275 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 276 int ret; 277 278 if (neigh->hh.hh_len) { 279 neigh_hh_bridge(&neigh->hh, skb); 280 skb->dev = nf_bridge->physindev; 281 ret = br_handle_frame_finish(net, sk, skb); 282 } else { 283 /* the neighbour function below overwrites the complete 284 * MAC header, so we save the Ethernet source address and 285 * protocol number. 286 */ 287 skb_copy_from_linear_data_offset(skb, 288 -(ETH_HLEN-ETH_ALEN), 289 nf_bridge->neigh_header, 290 ETH_HLEN-ETH_ALEN); 291 /* tell br_dev_xmit to continue with forwarding */ 292 nf_bridge->bridged_dnat = 1; 293 /* FIXME Need to refragment */ 294 ret = neigh->output(neigh, skb); 295 } 296 neigh_release(neigh); 297 return ret; 298 } 299 free_skb: 300 kfree_skb(skb); 301 return 0; 302 } 303 304 static inline bool 305 br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, 306 const struct nf_bridge_info *nf_bridge) 307 { 308 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; 309 } 310 311 /* This requires some explaining. If DNAT has taken place, 312 * we will need to fix up the destination Ethernet address. 313 * This is also true when SNAT takes place (for the reply direction). 314 * 315 * There are two cases to consider: 316 * 1. The packet was DNAT'ed to a device in the same bridge 317 * port group as it was received on. We can still bridge 318 * the packet. 319 * 2. The packet was DNAT'ed to a different device, either 320 * a non-bridged device or another bridge port group. 321 * The packet will need to be routed. 322 * 323 * The correct way of distinguishing between these two cases is to 324 * call ip_route_input() and to look at skb->dst->dev, which is 325 * changed to the destination device if ip_route_input() succeeds. 326 * 327 * Let's first consider the case that ip_route_input() succeeds: 328 * 329 * If the output device equals the logical bridge device the packet 330 * came in on, we can consider this bridging. The corresponding MAC 331 * address will be obtained in br_nf_pre_routing_finish_bridge. 332 * Otherwise, the packet is considered to be routed and we just 333 * change the destination MAC address so that the packet will 334 * later be passed up to the IP stack to be routed. For a redirected 335 * packet, ip_route_input() will give back the localhost as output device, 336 * which differs from the bridge device. 337 * 338 * Let's now consider the case that ip_route_input() fails: 339 * 340 * This can be because the destination address is martian, in which case 341 * the packet will be dropped. 342 * If IP forwarding is disabled, ip_route_input() will fail, while 343 * ip_route_output_key() can return success. The source 344 * address for ip_route_output_key() is set to zero, so ip_route_output_key() 345 * thinks we're handling a locally generated packet and won't care 346 * if IP forwarding is enabled. If the output device equals the logical bridge 347 * device, we proceed as if ip_route_input() succeeded. If it differs from the 348 * logical bridge port or if ip_route_output_key() fails we drop the packet. 349 */ 350 static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 351 { 352 struct net_device *dev = skb->dev; 353 struct iphdr *iph = ip_hdr(skb); 354 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 355 struct rtable *rt; 356 int err; 357 358 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 359 360 if (nf_bridge->pkt_otherhost) { 361 skb->pkt_type = PACKET_OTHERHOST; 362 nf_bridge->pkt_otherhost = false; 363 } 364 nf_bridge->in_prerouting = 0; 365 if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { 366 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 367 struct in_device *in_dev = __in_dev_get_rcu(dev); 368 369 /* If err equals -EHOSTUNREACH the error is due to a 370 * martian destination or due to the fact that 371 * forwarding is disabled. For most martian packets, 372 * ip_route_output_key() will fail. It won't fail for 2 types of 373 * martian destinations: loopback destinations and destination 374 * 0.0.0.0. In both cases the packet will be dropped because the 375 * destination is the loopback device and not the bridge. */ 376 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 377 goto free_skb; 378 379 rt = ip_route_output(net, iph->daddr, 0, 380 RT_TOS(iph->tos), 0); 381 if (!IS_ERR(rt)) { 382 /* - Bridged-and-DNAT'ed traffic doesn't 383 * require ip_forwarding. */ 384 if (rt->dst.dev == dev) { 385 skb_dst_set(skb, &rt->dst); 386 goto bridged_dnat; 387 } 388 ip_rt_put(rt); 389 } 390 free_skb: 391 kfree_skb(skb); 392 return 0; 393 } else { 394 if (skb_dst(skb)->dev == dev) { 395 bridged_dnat: 396 skb->dev = nf_bridge->physindev; 397 nf_bridge_update_protocol(skb); 398 nf_bridge_push_encap_header(skb); 399 br_nf_hook_thresh(NF_BR_PRE_ROUTING, 400 net, sk, skb, skb->dev, 401 NULL, 402 br_nf_pre_routing_finish_bridge); 403 return 0; 404 } 405 ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); 406 skb->pkt_type = PACKET_HOST; 407 } 408 } else { 409 rt = bridge_parent_rtable(nf_bridge->physindev); 410 if (!rt) { 411 kfree_skb(skb); 412 return 0; 413 } 414 skb_dst_set_noref(skb, &rt->dst); 415 } 416 417 skb->dev = nf_bridge->physindev; 418 nf_bridge_update_protocol(skb); 419 nf_bridge_push_encap_header(skb); 420 br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, 421 br_handle_frame_finish); 422 return 0; 423 } 424 425 static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) 426 { 427 struct net_device *vlan, *br; 428 429 br = bridge_parent(dev); 430 if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) 431 return br; 432 433 vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, 434 skb_vlan_tag_get(skb) & VLAN_VID_MASK); 435 436 return vlan ? vlan : br; 437 } 438 439 /* Some common code for IPv4/IPv6 */ 440 struct net_device *setup_pre_routing(struct sk_buff *skb) 441 { 442 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 443 444 if (skb->pkt_type == PACKET_OTHERHOST) { 445 skb->pkt_type = PACKET_HOST; 446 nf_bridge->pkt_otherhost = true; 447 } 448 449 nf_bridge->in_prerouting = 1; 450 nf_bridge->physindev = skb->dev; 451 skb->dev = brnf_get_logical_dev(skb, skb->dev); 452 453 if (skb->protocol == htons(ETH_P_8021Q)) 454 nf_bridge->orig_proto = BRNF_PROTO_8021Q; 455 else if (skb->protocol == htons(ETH_P_PPP_SES)) 456 nf_bridge->orig_proto = BRNF_PROTO_PPPOE; 457 458 /* Must drop socket now because of tproxy. */ 459 skb_orphan(skb); 460 return skb->dev; 461 } 462 463 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. 464 * Replicate the checks that IPv4 does on packet reception. 465 * Set skb->dev to the bridge device (i.e. parent of the 466 * receiving device) to make netfilter happy, the REDIRECT 467 * target in particular. Save the original destination IP 468 * address to be able to detect DNAT afterwards. */ 469 static unsigned int br_nf_pre_routing(void *priv, 470 struct sk_buff *skb, 471 const struct nf_hook_state *state) 472 { 473 struct nf_bridge_info *nf_bridge; 474 struct net_bridge_port *p; 475 struct net_bridge *br; 476 __u32 len = nf_bridge_encap_header_len(skb); 477 478 if (unlikely(!pskb_may_pull(skb, len))) 479 return NF_DROP; 480 481 p = br_port_get_rcu(state->in); 482 if (p == NULL) 483 return NF_DROP; 484 br = p->br; 485 486 if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { 487 if (!brnf_call_ip6tables && !br->nf_call_ip6tables) 488 return NF_ACCEPT; 489 490 nf_bridge_pull_encap_header_rcsum(skb); 491 return br_nf_pre_routing_ipv6(priv, skb, state); 492 } 493 494 if (!brnf_call_iptables && !br->nf_call_iptables) 495 return NF_ACCEPT; 496 497 if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) 498 return NF_ACCEPT; 499 500 nf_bridge_pull_encap_header_rcsum(skb); 501 502 if (br_validate_ipv4(state->net, skb)) 503 return NF_DROP; 504 505 nf_bridge_put(skb->nf_bridge); 506 if (!nf_bridge_alloc(skb)) 507 return NF_DROP; 508 if (!setup_pre_routing(skb)) 509 return NF_DROP; 510 511 nf_bridge = nf_bridge_info_get(skb); 512 nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; 513 514 skb->protocol = htons(ETH_P_IP); 515 516 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, 517 skb->dev, NULL, 518 br_nf_pre_routing_finish); 519 520 return NF_STOLEN; 521 } 522 523 524 /* PF_BRIDGE/FORWARD *************************************************/ 525 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 526 { 527 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 528 struct net_device *in; 529 530 if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { 531 532 if (skb->protocol == htons(ETH_P_IP)) 533 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 534 535 if (skb->protocol == htons(ETH_P_IPV6)) 536 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; 537 538 in = nf_bridge->physindev; 539 if (nf_bridge->pkt_otherhost) { 540 skb->pkt_type = PACKET_OTHERHOST; 541 nf_bridge->pkt_otherhost = false; 542 } 543 nf_bridge_update_protocol(skb); 544 } else { 545 in = *((struct net_device **)(skb->cb)); 546 } 547 nf_bridge_push_encap_header(skb); 548 549 br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, 550 br_forward_finish); 551 return 0; 552 } 553 554 555 /* This is the 'purely bridged' case. For IP, we pass the packet to 556 * netfilter with indev and outdev set to the bridge device, 557 * but we are still able to filter on the 'real' indev/outdev 558 * because of the physdev module. For ARP, indev and outdev are the 559 * bridge ports. */ 560 static unsigned int br_nf_forward_ip(void *priv, 561 struct sk_buff *skb, 562 const struct nf_hook_state *state) 563 { 564 struct nf_bridge_info *nf_bridge; 565 struct net_device *parent; 566 u_int8_t pf; 567 568 if (!skb->nf_bridge) 569 return NF_ACCEPT; 570 571 /* Need exclusive nf_bridge_info since we might have multiple 572 * different physoutdevs. */ 573 if (!nf_bridge_unshare(skb)) 574 return NF_DROP; 575 576 nf_bridge = nf_bridge_info_get(skb); 577 if (!nf_bridge) 578 return NF_DROP; 579 580 parent = bridge_parent(state->out); 581 if (!parent) 582 return NF_DROP; 583 584 if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) 585 pf = NFPROTO_IPV4; 586 else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) 587 pf = NFPROTO_IPV6; 588 else 589 return NF_ACCEPT; 590 591 nf_bridge_pull_encap_header(skb); 592 593 if (skb->pkt_type == PACKET_OTHERHOST) { 594 skb->pkt_type = PACKET_HOST; 595 nf_bridge->pkt_otherhost = true; 596 } 597 598 if (pf == NFPROTO_IPV4) { 599 if (br_validate_ipv4(state->net, skb)) 600 return NF_DROP; 601 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 602 } 603 604 if (pf == NFPROTO_IPV6) { 605 if (br_validate_ipv6(state->net, skb)) 606 return NF_DROP; 607 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 608 } 609 610 nf_bridge->physoutdev = skb->dev; 611 if (pf == NFPROTO_IPV4) 612 skb->protocol = htons(ETH_P_IP); 613 else 614 skb->protocol = htons(ETH_P_IPV6); 615 616 NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, 617 brnf_get_logical_dev(skb, state->in), 618 parent, br_nf_forward_finish); 619 620 return NF_STOLEN; 621 } 622 623 static unsigned int br_nf_forward_arp(void *priv, 624 struct sk_buff *skb, 625 const struct nf_hook_state *state) 626 { 627 struct net_bridge_port *p; 628 struct net_bridge *br; 629 struct net_device **d = (struct net_device **)(skb->cb); 630 631 p = br_port_get_rcu(state->out); 632 if (p == NULL) 633 return NF_ACCEPT; 634 br = p->br; 635 636 if (!brnf_call_arptables && !br->nf_call_arptables) 637 return NF_ACCEPT; 638 639 if (!IS_ARP(skb)) { 640 if (!IS_VLAN_ARP(skb)) 641 return NF_ACCEPT; 642 nf_bridge_pull_encap_header(skb); 643 } 644 645 if (arp_hdr(skb)->ar_pln != 4) { 646 if (IS_VLAN_ARP(skb)) 647 nf_bridge_push_encap_header(skb); 648 return NF_ACCEPT; 649 } 650 *d = state->in; 651 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, 652 state->in, state->out, br_nf_forward_finish); 653 654 return NF_STOLEN; 655 } 656 657 static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 658 { 659 struct brnf_frag_data *data; 660 int err; 661 662 data = this_cpu_ptr(&brnf_frag_data_storage); 663 err = skb_cow_head(skb, data->size); 664 665 if (err) { 666 kfree_skb(skb); 667 return 0; 668 } 669 670 if (data->vlan_tci) { 671 skb->vlan_tci = data->vlan_tci; 672 skb->vlan_proto = data->vlan_proto; 673 } 674 675 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); 676 __skb_push(skb, data->encap_size); 677 678 nf_bridge_info_free(skb); 679 return br_dev_queue_push_xmit(net, sk, skb); 680 } 681 682 static int 683 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 684 int (*output)(struct net *, struct sock *, struct sk_buff *)) 685 { 686 unsigned int mtu = ip_skb_dst_mtu(sk, skb); 687 struct iphdr *iph = ip_hdr(skb); 688 689 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || 690 (IPCB(skb)->frag_max_size && 691 IPCB(skb)->frag_max_size > mtu))) { 692 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 693 kfree_skb(skb); 694 return -EMSGSIZE; 695 } 696 697 return ip_do_fragment(net, sk, skb, output); 698 } 699 700 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 701 { 702 if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) 703 return PPPOE_SES_HLEN; 704 return 0; 705 } 706 707 static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 708 { 709 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 710 unsigned int mtu, mtu_reserved; 711 712 mtu_reserved = nf_bridge_mtu_reduction(skb); 713 mtu = skb->dev->mtu; 714 715 if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) 716 mtu = nf_bridge->frag_max_size; 717 718 if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { 719 nf_bridge_info_free(skb); 720 return br_dev_queue_push_xmit(net, sk, skb); 721 } 722 723 /* This is wrong! We should preserve the original fragment 724 * boundaries by preserving frag_list rather than refragmenting. 725 */ 726 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && 727 skb->protocol == htons(ETH_P_IP)) { 728 struct brnf_frag_data *data; 729 730 if (br_validate_ipv4(net, skb)) 731 goto drop; 732 733 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 734 735 nf_bridge_update_protocol(skb); 736 737 data = this_cpu_ptr(&brnf_frag_data_storage); 738 739 data->vlan_tci = skb->vlan_tci; 740 data->vlan_proto = skb->vlan_proto; 741 data->encap_size = nf_bridge_encap_header_len(skb); 742 data->size = ETH_HLEN + data->encap_size; 743 744 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 745 data->size); 746 747 return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); 748 } 749 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && 750 skb->protocol == htons(ETH_P_IPV6)) { 751 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 752 struct brnf_frag_data *data; 753 754 if (br_validate_ipv6(net, skb)) 755 goto drop; 756 757 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 758 759 nf_bridge_update_protocol(skb); 760 761 data = this_cpu_ptr(&brnf_frag_data_storage); 762 data->encap_size = nf_bridge_encap_header_len(skb); 763 data->size = ETH_HLEN + data->encap_size; 764 765 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 766 data->size); 767 768 if (v6ops) 769 return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); 770 771 kfree_skb(skb); 772 return -EMSGSIZE; 773 } 774 nf_bridge_info_free(skb); 775 return br_dev_queue_push_xmit(net, sk, skb); 776 drop: 777 kfree_skb(skb); 778 return 0; 779 } 780 781 /* PF_BRIDGE/POST_ROUTING ********************************************/ 782 static unsigned int br_nf_post_routing(void *priv, 783 struct sk_buff *skb, 784 const struct nf_hook_state *state) 785 { 786 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 787 struct net_device *realoutdev = bridge_parent(skb->dev); 788 u_int8_t pf; 789 790 /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in 791 * on a bridge, but was delivered locally and is now being routed: 792 * 793 * POST_ROUTING was already invoked from the ip stack. 794 */ 795 if (!nf_bridge || !nf_bridge->physoutdev) 796 return NF_ACCEPT; 797 798 if (!realoutdev) 799 return NF_DROP; 800 801 if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) 802 pf = NFPROTO_IPV4; 803 else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) 804 pf = NFPROTO_IPV6; 805 else 806 return NF_ACCEPT; 807 808 /* We assume any code from br_dev_queue_push_xmit onwards doesn't care 809 * about the value of skb->pkt_type. */ 810 if (skb->pkt_type == PACKET_OTHERHOST) { 811 skb->pkt_type = PACKET_HOST; 812 nf_bridge->pkt_otherhost = true; 813 } 814 815 nf_bridge_pull_encap_header(skb); 816 if (pf == NFPROTO_IPV4) 817 skb->protocol = htons(ETH_P_IP); 818 else 819 skb->protocol = htons(ETH_P_IPV6); 820 821 NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, 822 NULL, realoutdev, 823 br_nf_dev_queue_xmit); 824 825 return NF_STOLEN; 826 } 827 828 /* IP/SABOTAGE *****************************************************/ 829 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 830 * for the second time. */ 831 static unsigned int ip_sabotage_in(void *priv, 832 struct sk_buff *skb, 833 const struct nf_hook_state *state) 834 { 835 if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) { 836 state->okfn(state->net, state->sk, skb); 837 return NF_STOLEN; 838 } 839 840 return NF_ACCEPT; 841 } 842 843 /* This is called when br_netfilter has called into iptables/netfilter, 844 * and DNAT has taken place on a bridge-forwarded packet. 845 * 846 * neigh->output has created a new MAC header, with local br0 MAC 847 * as saddr. 848 * 849 * This restores the original MAC saddr of the bridged packet 850 * before invoking bridge forward logic to transmit the packet. 851 */ 852 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) 853 { 854 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 855 856 skb_pull(skb, ETH_HLEN); 857 nf_bridge->bridged_dnat = 0; 858 859 BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); 860 861 skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), 862 nf_bridge->neigh_header, 863 ETH_HLEN - ETH_ALEN); 864 skb->dev = nf_bridge->physindev; 865 866 nf_bridge->physoutdev = NULL; 867 br_handle_frame_finish(dev_net(skb->dev), NULL, skb); 868 } 869 870 static int br_nf_dev_xmit(struct sk_buff *skb) 871 { 872 if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) { 873 br_nf_pre_routing_finish_bridge_slow(skb); 874 return 1; 875 } 876 return 0; 877 } 878 879 static const struct nf_br_ops br_ops = { 880 .br_dev_xmit_hook = br_nf_dev_xmit, 881 }; 882 883 void br_netfilter_enable(void) 884 { 885 } 886 EXPORT_SYMBOL_GPL(br_netfilter_enable); 887 888 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 889 * br_dev_queue_push_xmit is called afterwards */ 890 static const struct nf_hook_ops br_nf_ops[] = { 891 { 892 .hook = br_nf_pre_routing, 893 .pf = NFPROTO_BRIDGE, 894 .hooknum = NF_BR_PRE_ROUTING, 895 .priority = NF_BR_PRI_BRNF, 896 }, 897 { 898 .hook = br_nf_forward_ip, 899 .pf = NFPROTO_BRIDGE, 900 .hooknum = NF_BR_FORWARD, 901 .priority = NF_BR_PRI_BRNF - 1, 902 }, 903 { 904 .hook = br_nf_forward_arp, 905 .pf = NFPROTO_BRIDGE, 906 .hooknum = NF_BR_FORWARD, 907 .priority = NF_BR_PRI_BRNF, 908 }, 909 { 910 .hook = br_nf_post_routing, 911 .pf = NFPROTO_BRIDGE, 912 .hooknum = NF_BR_POST_ROUTING, 913 .priority = NF_BR_PRI_LAST, 914 }, 915 { 916 .hook = ip_sabotage_in, 917 .pf = NFPROTO_IPV4, 918 .hooknum = NF_INET_PRE_ROUTING, 919 .priority = NF_IP_PRI_FIRST, 920 }, 921 { 922 .hook = ip_sabotage_in, 923 .pf = NFPROTO_IPV6, 924 .hooknum = NF_INET_PRE_ROUTING, 925 .priority = NF_IP6_PRI_FIRST, 926 }, 927 }; 928 929 static int brnf_device_event(struct notifier_block *unused, unsigned long event, 930 void *ptr) 931 { 932 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 933 struct brnf_net *brnet; 934 struct net *net; 935 int ret; 936 937 if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE)) 938 return NOTIFY_DONE; 939 940 ASSERT_RTNL(); 941 942 net = dev_net(dev); 943 brnet = net_generic(net, brnf_net_id); 944 if (brnet->enabled) 945 return NOTIFY_OK; 946 947 ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 948 if (ret) 949 return NOTIFY_BAD; 950 951 brnet->enabled = true; 952 return NOTIFY_OK; 953 } 954 955 static void __net_exit brnf_exit_net(struct net *net) 956 { 957 struct brnf_net *brnet = net_generic(net, brnf_net_id); 958 959 if (!brnet->enabled) 960 return; 961 962 nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 963 brnet->enabled = false; 964 } 965 966 static struct pernet_operations brnf_net_ops __read_mostly = { 967 .exit = brnf_exit_net, 968 .id = &brnf_net_id, 969 .size = sizeof(struct brnf_net), 970 }; 971 972 static struct notifier_block brnf_notifier __read_mostly = { 973 .notifier_call = brnf_device_event, 974 }; 975 976 /* recursively invokes nf_hook_slow (again), skipping already-called 977 * hooks (< NF_BR_PRI_BRNF). 978 * 979 * Called with rcu read lock held. 980 */ 981 int br_nf_hook_thresh(unsigned int hook, struct net *net, 982 struct sock *sk, struct sk_buff *skb, 983 struct net_device *indev, 984 struct net_device *outdev, 985 int (*okfn)(struct net *, struct sock *, 986 struct sk_buff *)) 987 { 988 const struct nf_hook_entries *e; 989 struct nf_hook_state state; 990 struct nf_hook_ops **ops; 991 unsigned int i; 992 int ret; 993 994 e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); 995 if (!e) 996 return okfn(net, sk, skb); 997 998 ops = nf_hook_entries_get_hook_ops(e); 999 for (i = 0; i < e->num_hook_entries && 1000 ops[i]->priority <= NF_BR_PRI_BRNF; i++) 1001 ; 1002 1003 nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, 1004 sk, net, okfn); 1005 1006 ret = nf_hook_slow(skb, &state, e, i); 1007 if (ret == 1) 1008 ret = okfn(net, sk, skb); 1009 1010 return ret; 1011 } 1012 1013 #ifdef CONFIG_SYSCTL 1014 static 1015 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, 1016 void __user *buffer, size_t *lenp, loff_t *ppos) 1017 { 1018 int ret; 1019 1020 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1021 1022 if (write && *(int *)(ctl->data)) 1023 *(int *)(ctl->data) = 1; 1024 return ret; 1025 } 1026 1027 static struct ctl_table brnf_table[] = { 1028 { 1029 .procname = "bridge-nf-call-arptables", 1030 .data = &brnf_call_arptables, 1031 .maxlen = sizeof(int), 1032 .mode = 0644, 1033 .proc_handler = brnf_sysctl_call_tables, 1034 }, 1035 { 1036 .procname = "bridge-nf-call-iptables", 1037 .data = &brnf_call_iptables, 1038 .maxlen = sizeof(int), 1039 .mode = 0644, 1040 .proc_handler = brnf_sysctl_call_tables, 1041 }, 1042 { 1043 .procname = "bridge-nf-call-ip6tables", 1044 .data = &brnf_call_ip6tables, 1045 .maxlen = sizeof(int), 1046 .mode = 0644, 1047 .proc_handler = brnf_sysctl_call_tables, 1048 }, 1049 { 1050 .procname = "bridge-nf-filter-vlan-tagged", 1051 .data = &brnf_filter_vlan_tagged, 1052 .maxlen = sizeof(int), 1053 .mode = 0644, 1054 .proc_handler = brnf_sysctl_call_tables, 1055 }, 1056 { 1057 .procname = "bridge-nf-filter-pppoe-tagged", 1058 .data = &brnf_filter_pppoe_tagged, 1059 .maxlen = sizeof(int), 1060 .mode = 0644, 1061 .proc_handler = brnf_sysctl_call_tables, 1062 }, 1063 { 1064 .procname = "bridge-nf-pass-vlan-input-dev", 1065 .data = &brnf_pass_vlan_indev, 1066 .maxlen = sizeof(int), 1067 .mode = 0644, 1068 .proc_handler = brnf_sysctl_call_tables, 1069 }, 1070 { } 1071 }; 1072 #endif 1073 1074 static int __init br_netfilter_init(void) 1075 { 1076 int ret; 1077 1078 ret = register_pernet_subsys(&brnf_net_ops); 1079 if (ret < 0) 1080 return ret; 1081 1082 ret = register_netdevice_notifier(&brnf_notifier); 1083 if (ret < 0) { 1084 unregister_pernet_subsys(&brnf_net_ops); 1085 return ret; 1086 } 1087 1088 #ifdef CONFIG_SYSCTL 1089 brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); 1090 if (brnf_sysctl_header == NULL) { 1091 printk(KERN_WARNING 1092 "br_netfilter: can't register to sysctl.\n"); 1093 unregister_netdevice_notifier(&brnf_notifier); 1094 unregister_pernet_subsys(&brnf_net_ops); 1095 return -ENOMEM; 1096 } 1097 #endif 1098 RCU_INIT_POINTER(nf_br_ops, &br_ops); 1099 printk(KERN_NOTICE "Bridge firewalling registered\n"); 1100 return 0; 1101 } 1102 1103 static void __exit br_netfilter_fini(void) 1104 { 1105 RCU_INIT_POINTER(nf_br_ops, NULL); 1106 unregister_netdevice_notifier(&brnf_notifier); 1107 unregister_pernet_subsys(&brnf_net_ops); 1108 #ifdef CONFIG_SYSCTL 1109 unregister_net_sysctl_table(brnf_sysctl_header); 1110 #endif 1111 } 1112 1113 module_init(br_netfilter_init); 1114 module_exit(br_netfilter_fini); 1115 1116 MODULE_LICENSE("GPL"); 1117 MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); 1118 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 1119 MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); 1120