1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #include <linux/types.h> 3 #include <linux/ip.h> 4 #include <linux/netfilter.h> 5 #include <linux/netfilter_ipv6.h> 6 #include <linux/netfilter_bridge.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/icmp.h> 10 #include <linux/sysctl.h> 11 #include <net/route.h> 12 #include <net/ip.h> 13 14 #include <net/netfilter/nf_conntrack.h> 15 #include <net/netfilter/nf_conntrack_core.h> 16 #include <net/netfilter/nf_conntrack_helper.h> 17 #include <net/netfilter/nf_conntrack_bridge.h> 18 19 #include <linux/netfilter/nf_tables.h> 20 #include <net/netfilter/nf_tables.h> 21 22 #include "../br_private.h" 23 24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff 25 * has been linearized or cloned. 26 */ 27 static int nf_br_ip_fragment(struct net *net, struct sock *sk, 28 struct sk_buff *skb, 29 struct nf_bridge_frag_data *data, 30 int (*output)(struct net *, struct sock *sk, 31 const struct nf_bridge_frag_data *data, 32 struct sk_buff *)) 33 { 34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; 35 unsigned int hlen, ll_rs, mtu; 36 ktime_t tstamp = skb->tstamp; 37 struct ip_frag_state state; 38 struct iphdr *iph; 39 int err; 40 41 /* for offloaded checksums cleanup checksum before fragmentation */ 42 if (skb->ip_summed == CHECKSUM_PARTIAL && 43 (err = skb_checksum_help(skb))) 44 goto blackhole; 45 46 iph = ip_hdr(skb); 47 48 /* 49 * Setup starting values 50 */ 51 52 hlen = iph->ihl * 4; 53 frag_max_size -= hlen; 54 ll_rs = LL_RESERVED_SPACE(skb->dev); 55 mtu = skb->dev->mtu; 56 57 if (skb_has_frag_list(skb)) { 58 unsigned int first_len = skb_pagelen(skb); 59 struct ip_fraglist_iter iter; 60 struct sk_buff *frag; 61 62 if (first_len - hlen > mtu || 63 skb_headroom(skb) < ll_rs) 64 goto blackhole; 65 66 if (skb_cloned(skb)) 67 goto slow_path; 68 69 skb_walk_frags(skb, frag) { 70 if (frag->len > mtu || 71 skb_headroom(frag) < hlen + ll_rs) 72 goto blackhole; 73 74 if (skb_shared(frag)) 75 goto slow_path; 76 } 77 78 ip_fraglist_init(skb, iph, hlen, &iter); 79 80 for (;;) { 81 if (iter.frag) 82 ip_fraglist_prepare(skb, &iter); 83 84 skb->tstamp = tstamp; 85 err = output(net, sk, data, skb); 86 if (err || !iter.frag) 87 break; 88 89 skb = ip_fraglist_next(&iter); 90 } 91 92 if (!err) 93 return 0; 94 95 kfree_skb_list(iter.frag); 96 97 return err; 98 } 99 slow_path: 100 /* This is a linearized skbuff, the original geometry is lost for us. 101 * This may also be a clone skbuff, we could preserve the geometry for 102 * the copies but probably not worth the effort. 103 */ 104 ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); 105 106 while (state.left > 0) { 107 struct sk_buff *skb2; 108 109 skb2 = ip_frag_next(skb, &state); 110 if (IS_ERR(skb2)) { 111 err = PTR_ERR(skb2); 112 goto blackhole; 113 } 114 115 skb2->tstamp = tstamp; 116 err = output(net, sk, data, skb2); 117 if (err) 118 goto blackhole; 119 } 120 consume_skb(skb); 121 return err; 122 123 blackhole: 124 kfree_skb(skb); 125 return 0; 126 } 127 128 /* ip_defrag() expects IPCB() in place. */ 129 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, 130 size_t inet_skb_parm_size) 131 { 132 memcpy(cb, skb->cb, sizeof(*cb)); 133 memset(skb->cb, 0, inet_skb_parm_size); 134 } 135 136 static void br_skb_cb_restore(struct sk_buff *skb, 137 const struct br_input_skb_cb *cb, 138 u16 fragsz) 139 { 140 memcpy(skb->cb, cb, sizeof(*cb)); 141 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; 142 } 143 144 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, 145 const struct nf_hook_state *state) 146 { 147 u16 zone_id = NF_CT_DEFAULT_ZONE_ID; 148 enum ip_conntrack_info ctinfo; 149 struct br_input_skb_cb cb; 150 const struct nf_conn *ct; 151 int err; 152 153 if (!ip_is_fragment(ip_hdr(skb))) 154 return NF_ACCEPT; 155 156 ct = nf_ct_get(skb, &ctinfo); 157 if (ct) 158 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); 159 160 br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm)); 161 local_bh_disable(); 162 err = ip_defrag(state->net, skb, 163 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); 164 local_bh_enable(); 165 if (!err) { 166 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size); 167 skb->ignore_df = 1; 168 return NF_ACCEPT; 169 } 170 171 return NF_STOLEN; 172 } 173 174 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, 175 const struct nf_hook_state *state) 176 { 177 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 178 u16 zone_id = NF_CT_DEFAULT_ZONE_ID; 179 enum ip_conntrack_info ctinfo; 180 struct br_input_skb_cb cb; 181 const struct nf_conn *ct; 182 int err; 183 184 ct = nf_ct_get(skb, &ctinfo); 185 if (ct) 186 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); 187 188 br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); 189 190 err = nf_ct_frag6_gather(state->net, skb, 191 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); 192 /* queued */ 193 if (err == -EINPROGRESS) 194 return NF_STOLEN; 195 196 br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); 197 return err == 0 ? NF_ACCEPT : NF_DROP; 198 #else 199 return NF_ACCEPT; 200 #endif 201 } 202 203 static int nf_ct_br_ip_check(const struct sk_buff *skb) 204 { 205 const struct iphdr *iph; 206 int nhoff, len; 207 208 nhoff = skb_network_offset(skb); 209 iph = ip_hdr(skb); 210 if (iph->ihl < 5 || 211 iph->version != 4) 212 return -1; 213 214 len = ntohs(iph->tot_len); 215 if (skb->len < nhoff + len || 216 len < (iph->ihl * 4)) 217 return -1; 218 219 return 0; 220 } 221 222 static int nf_ct_br_ipv6_check(const struct sk_buff *skb) 223 { 224 const struct ipv6hdr *hdr; 225 int nhoff, len; 226 227 nhoff = skb_network_offset(skb); 228 hdr = ipv6_hdr(skb); 229 if (hdr->version != 6) 230 return -1; 231 232 len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; 233 if (skb->len < len) 234 return -1; 235 236 return 0; 237 } 238 239 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, 240 const struct nf_hook_state *state) 241 { 242 struct nf_hook_state bridge_state = *state; 243 enum ip_conntrack_info ctinfo; 244 struct nf_conn *ct; 245 u32 len; 246 int ret; 247 248 ct = nf_ct_get(skb, &ctinfo); 249 if ((ct && !nf_ct_is_template(ct)) || 250 ctinfo == IP_CT_UNTRACKED) 251 return NF_ACCEPT; 252 253 switch (skb->protocol) { 254 case htons(ETH_P_IP): 255 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 256 return NF_ACCEPT; 257 258 len = ntohs(ip_hdr(skb)->tot_len); 259 if (pskb_trim_rcsum(skb, len)) 260 return NF_ACCEPT; 261 262 if (nf_ct_br_ip_check(skb)) 263 return NF_ACCEPT; 264 265 bridge_state.pf = NFPROTO_IPV4; 266 ret = nf_ct_br_defrag4(skb, &bridge_state); 267 break; 268 case htons(ETH_P_IPV6): 269 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 270 return NF_ACCEPT; 271 272 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); 273 if (pskb_trim_rcsum(skb, len)) 274 return NF_ACCEPT; 275 276 if (nf_ct_br_ipv6_check(skb)) 277 return NF_ACCEPT; 278 279 bridge_state.pf = NFPROTO_IPV6; 280 ret = nf_ct_br_defrag6(skb, &bridge_state); 281 break; 282 default: 283 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 284 return NF_ACCEPT; 285 } 286 287 if (ret != NF_ACCEPT) 288 return ret; 289 290 return nf_conntrack_in(skb, &bridge_state); 291 } 292 293 static void nf_ct_bridge_frag_save(struct sk_buff *skb, 294 struct nf_bridge_frag_data *data) 295 { 296 if (skb_vlan_tag_present(skb)) { 297 data->vlan_present = true; 298 data->vlan_tci = skb->vlan_tci; 299 data->vlan_proto = skb->vlan_proto; 300 } else { 301 data->vlan_present = false; 302 } 303 skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); 304 } 305 306 static unsigned int 307 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, 308 int (*output)(struct net *, struct sock *sk, 309 const struct nf_bridge_frag_data *data, 310 struct sk_buff *)) 311 { 312 struct nf_bridge_frag_data data; 313 314 if (!BR_INPUT_SKB_CB(skb)->frag_max_size) 315 return NF_ACCEPT; 316 317 nf_ct_bridge_frag_save(skb, &data); 318 switch (skb->protocol) { 319 case htons(ETH_P_IP): 320 nf_br_ip_fragment(state->net, state->sk, skb, &data, output); 321 break; 322 case htons(ETH_P_IPV6): 323 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output); 324 break; 325 default: 326 WARN_ON_ONCE(1); 327 return NF_DROP; 328 } 329 330 return NF_STOLEN; 331 } 332 333 /* Actually only slow path refragmentation needs this. */ 334 static int nf_ct_bridge_frag_restore(struct sk_buff *skb, 335 const struct nf_bridge_frag_data *data) 336 { 337 int err; 338 339 err = skb_cow_head(skb, ETH_HLEN); 340 if (err) { 341 kfree_skb(skb); 342 return -ENOMEM; 343 } 344 if (data->vlan_present) 345 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); 346 else if (skb_vlan_tag_present(skb)) 347 __vlan_hwaccel_clear_tag(skb); 348 349 skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); 350 skb_reset_mac_header(skb); 351 352 return 0; 353 } 354 355 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, 356 const struct nf_bridge_frag_data *data, 357 struct sk_buff *skb) 358 { 359 int err; 360 361 err = nf_ct_bridge_frag_restore(skb, data); 362 if (err < 0) 363 return err; 364 365 return br_dev_queue_push_xmit(net, sk, skb); 366 } 367 368 static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb) 369 { 370 enum ip_conntrack_info ctinfo; 371 struct nf_conn *ct; 372 int protoff; 373 374 ct = nf_ct_get(skb, &ctinfo); 375 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 376 return nf_conntrack_confirm(skb); 377 378 switch (skb->protocol) { 379 case htons(ETH_P_IP): 380 protoff = skb_network_offset(skb) + ip_hdrlen(skb); 381 break; 382 case htons(ETH_P_IPV6): { 383 unsigned char pnum = ipv6_hdr(skb)->nexthdr; 384 __be16 frag_off; 385 386 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 387 &frag_off); 388 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) 389 return nf_conntrack_confirm(skb); 390 } 391 break; 392 default: 393 return NF_ACCEPT; 394 } 395 return nf_confirm(skb, protoff, ct, ctinfo); 396 } 397 398 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, 399 const struct nf_hook_state *state) 400 { 401 int ret; 402 403 ret = nf_ct_bridge_confirm(skb); 404 if (ret != NF_ACCEPT) 405 return ret; 406 407 return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post); 408 } 409 410 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { 411 { 412 .hook = nf_ct_bridge_pre, 413 .pf = NFPROTO_BRIDGE, 414 .hooknum = NF_BR_PRE_ROUTING, 415 .priority = NF_IP_PRI_CONNTRACK, 416 }, 417 { 418 .hook = nf_ct_bridge_post, 419 .pf = NFPROTO_BRIDGE, 420 .hooknum = NF_BR_POST_ROUTING, 421 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 422 }, 423 }; 424 425 static struct nf_ct_bridge_info bridge_info = { 426 .ops = nf_ct_bridge_hook_ops, 427 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), 428 .me = THIS_MODULE, 429 }; 430 431 static int __init nf_conntrack_l3proto_bridge_init(void) 432 { 433 nf_ct_bridge_register(&bridge_info); 434 435 return 0; 436 } 437 438 static void __exit nf_conntrack_l3proto_bridge_fini(void) 439 { 440 nf_ct_bridge_unregister(&bridge_info); 441 } 442 443 module_init(nf_conntrack_l3proto_bridge_init); 444 module_exit(nf_conntrack_l3proto_bridge_fini); 445 446 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); 447 MODULE_LICENSE("GPL"); 448