1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #include <linux/types.h> 3 #include <linux/ip.h> 4 #include <linux/netfilter.h> 5 #include <linux/netfilter_ipv6.h> 6 #include <linux/netfilter_bridge.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/icmp.h> 10 #include <linux/sysctl.h> 11 #include <net/route.h> 12 #include <net/ip.h> 13 14 #include <net/netfilter/nf_conntrack.h> 15 #include <net/netfilter/nf_conntrack_core.h> 16 #include <net/netfilter/nf_conntrack_helper.h> 17 #include <net/netfilter/nf_conntrack_bridge.h> 18 19 #include <linux/netfilter/nf_tables.h> 20 #include <net/netfilter/nf_tables.h> 21 22 #include "../br_private.h" 23 24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff 25 * has been linearized or cloned. 26 */ 27 static int nf_br_ip_fragment(struct net *net, struct sock *sk, 28 struct sk_buff *skb, 29 struct nf_bridge_frag_data *data, 30 int (*output)(struct net *, struct sock *sk, 31 const struct nf_bridge_frag_data *data, 32 struct sk_buff *)) 33 { 34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; 35 unsigned int hlen, ll_rs, mtu; 36 ktime_t tstamp = skb->tstamp; 37 struct ip_frag_state state; 38 struct iphdr *iph; 39 int err; 40 41 /* for offloaded checksums cleanup checksum before fragmentation */ 42 if (skb->ip_summed == CHECKSUM_PARTIAL && 43 (err = skb_checksum_help(skb))) 44 goto blackhole; 45 46 iph = ip_hdr(skb); 47 48 /* 49 * Setup starting values 50 */ 51 52 hlen = iph->ihl * 4; 53 frag_max_size -= hlen; 54 ll_rs = LL_RESERVED_SPACE(skb->dev); 55 mtu = skb->dev->mtu; 56 57 if (skb_has_frag_list(skb)) { 58 unsigned int first_len = skb_pagelen(skb); 59 struct ip_fraglist_iter iter; 60 struct sk_buff *frag; 61 62 if (first_len - hlen > mtu || 63 skb_headroom(skb) < ll_rs) 64 goto blackhole; 65 66 if (skb_cloned(skb)) 67 goto slow_path; 68 69 skb_walk_frags(skb, frag) { 70 if (frag->len > mtu || 71 skb_headroom(frag) < hlen + ll_rs) 72 goto blackhole; 73 74 if (skb_shared(frag)) 75 goto slow_path; 76 } 77 78 ip_fraglist_init(skb, iph, hlen, &iter); 79 80 for (;;) { 81 if (iter.frag) 82 ip_fraglist_prepare(skb, &iter); 83 84 skb->tstamp = tstamp; 85 err = output(net, sk, data, skb); 86 if (err || !iter.frag) 87 break; 88 89 skb = ip_fraglist_next(&iter); 90 } 91 return err; 92 } 93 slow_path: 94 /* This is a linearized skbuff, the original geometry is lost for us. 95 * This may also be a clone skbuff, we could preserve the geometry for 96 * the copies but probably not worth the effort. 97 */ 98 ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); 99 100 while (state.left > 0) { 101 struct sk_buff *skb2; 102 103 skb2 = ip_frag_next(skb, &state); 104 if (IS_ERR(skb2)) { 105 err = PTR_ERR(skb2); 106 goto blackhole; 107 } 108 109 skb2->tstamp = tstamp; 110 err = output(net, sk, data, skb2); 111 if (err) 112 goto blackhole; 113 } 114 consume_skb(skb); 115 return err; 116 117 blackhole: 118 kfree_skb(skb); 119 return 0; 120 } 121 122 /* ip_defrag() expects IPCB() in place. */ 123 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, 124 size_t inet_skb_parm_size) 125 { 126 memcpy(cb, skb->cb, sizeof(*cb)); 127 memset(skb->cb, 0, inet_skb_parm_size); 128 } 129 130 static void br_skb_cb_restore(struct sk_buff *skb, 131 const struct br_input_skb_cb *cb, 132 u16 fragsz) 133 { 134 memcpy(skb->cb, cb, sizeof(*cb)); 135 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; 136 } 137 138 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, 139 const struct nf_hook_state *state) 140 { 141 u16 zone_id = NF_CT_DEFAULT_ZONE_ID; 142 enum ip_conntrack_info ctinfo; 143 struct br_input_skb_cb cb; 144 const struct nf_conn *ct; 145 int err; 146 147 if (!ip_is_fragment(ip_hdr(skb))) 148 return NF_ACCEPT; 149 150 ct = nf_ct_get(skb, &ctinfo); 151 if (ct) 152 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); 153 154 br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm)); 155 local_bh_disable(); 156 err = ip_defrag(state->net, skb, 157 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); 158 local_bh_enable(); 159 if (!err) { 160 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size); 161 skb->ignore_df = 1; 162 return NF_ACCEPT; 163 } 164 165 return NF_STOLEN; 166 } 167 168 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, 169 const struct nf_hook_state *state) 170 { 171 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 172 u16 zone_id = NF_CT_DEFAULT_ZONE_ID; 173 enum ip_conntrack_info ctinfo; 174 struct br_input_skb_cb cb; 175 const struct nf_conn *ct; 176 int err; 177 178 ct = nf_ct_get(skb, &ctinfo); 179 if (ct) 180 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); 181 182 br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); 183 184 err = nf_ct_frag6_gather(state->net, skb, 185 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); 186 /* queued */ 187 if (err == -EINPROGRESS) 188 return NF_STOLEN; 189 190 br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); 191 return err == 0 ? NF_ACCEPT : NF_DROP; 192 #else 193 return NF_ACCEPT; 194 #endif 195 } 196 197 static int nf_ct_br_ip_check(const struct sk_buff *skb) 198 { 199 const struct iphdr *iph; 200 int nhoff, len; 201 202 nhoff = skb_network_offset(skb); 203 iph = ip_hdr(skb); 204 if (iph->ihl < 5 || 205 iph->version != 4) 206 return -1; 207 208 len = ntohs(iph->tot_len); 209 if (skb->len < nhoff + len || 210 len < (iph->ihl * 4)) 211 return -1; 212 213 return 0; 214 } 215 216 static int nf_ct_br_ipv6_check(const struct sk_buff *skb) 217 { 218 const struct ipv6hdr *hdr; 219 int nhoff, len; 220 221 nhoff = skb_network_offset(skb); 222 hdr = ipv6_hdr(skb); 223 if (hdr->version != 6) 224 return -1; 225 226 len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; 227 if (skb->len < len) 228 return -1; 229 230 return 0; 231 } 232 233 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, 234 const struct nf_hook_state *state) 235 { 236 struct nf_hook_state bridge_state = *state; 237 enum ip_conntrack_info ctinfo; 238 struct nf_conn *ct; 239 u32 len; 240 int ret; 241 242 ct = nf_ct_get(skb, &ctinfo); 243 if ((ct && !nf_ct_is_template(ct)) || 244 ctinfo == IP_CT_UNTRACKED) 245 return NF_ACCEPT; 246 247 switch (skb->protocol) { 248 case htons(ETH_P_IP): 249 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 250 return NF_ACCEPT; 251 252 len = ntohs(ip_hdr(skb)->tot_len); 253 if (pskb_trim_rcsum(skb, len)) 254 return NF_ACCEPT; 255 256 if (nf_ct_br_ip_check(skb)) 257 return NF_ACCEPT; 258 259 bridge_state.pf = NFPROTO_IPV4; 260 ret = nf_ct_br_defrag4(skb, &bridge_state); 261 break; 262 case htons(ETH_P_IPV6): 263 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 264 return NF_ACCEPT; 265 266 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); 267 if (pskb_trim_rcsum(skb, len)) 268 return NF_ACCEPT; 269 270 if (nf_ct_br_ipv6_check(skb)) 271 return NF_ACCEPT; 272 273 bridge_state.pf = NFPROTO_IPV6; 274 ret = nf_ct_br_defrag6(skb, &bridge_state); 275 break; 276 default: 277 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 278 return NF_ACCEPT; 279 } 280 281 if (ret != NF_ACCEPT) 282 return ret; 283 284 return nf_conntrack_in(skb, &bridge_state); 285 } 286 287 static void nf_ct_bridge_frag_save(struct sk_buff *skb, 288 struct nf_bridge_frag_data *data) 289 { 290 if (skb_vlan_tag_present(skb)) { 291 data->vlan_present = true; 292 data->vlan_tci = skb->vlan_tci; 293 data->vlan_proto = skb->vlan_proto; 294 } else { 295 data->vlan_present = false; 296 } 297 skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); 298 } 299 300 static unsigned int 301 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, 302 int (*output)(struct net *, struct sock *sk, 303 const struct nf_bridge_frag_data *data, 304 struct sk_buff *)) 305 { 306 struct nf_bridge_frag_data data; 307 308 if (!BR_INPUT_SKB_CB(skb)->frag_max_size) 309 return NF_ACCEPT; 310 311 nf_ct_bridge_frag_save(skb, &data); 312 switch (skb->protocol) { 313 case htons(ETH_P_IP): 314 nf_br_ip_fragment(state->net, state->sk, skb, &data, output); 315 break; 316 case htons(ETH_P_IPV6): 317 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output); 318 break; 319 default: 320 WARN_ON_ONCE(1); 321 return NF_DROP; 322 } 323 324 return NF_STOLEN; 325 } 326 327 /* Actually only slow path refragmentation needs this. */ 328 static int nf_ct_bridge_frag_restore(struct sk_buff *skb, 329 const struct nf_bridge_frag_data *data) 330 { 331 int err; 332 333 err = skb_cow_head(skb, ETH_HLEN); 334 if (err) { 335 kfree_skb(skb); 336 return -ENOMEM; 337 } 338 if (data->vlan_present) 339 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); 340 else if (skb_vlan_tag_present(skb)) 341 __vlan_hwaccel_clear_tag(skb); 342 343 skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); 344 skb_reset_mac_header(skb); 345 346 return 0; 347 } 348 349 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, 350 const struct nf_bridge_frag_data *data, 351 struct sk_buff *skb) 352 { 353 int err; 354 355 err = nf_ct_bridge_frag_restore(skb, data); 356 if (err < 0) 357 return err; 358 359 return br_dev_queue_push_xmit(net, sk, skb); 360 } 361 362 static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb) 363 { 364 enum ip_conntrack_info ctinfo; 365 struct nf_conn *ct; 366 int protoff; 367 368 ct = nf_ct_get(skb, &ctinfo); 369 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 370 return nf_conntrack_confirm(skb); 371 372 switch (skb->protocol) { 373 case htons(ETH_P_IP): 374 protoff = skb_network_offset(skb) + ip_hdrlen(skb); 375 break; 376 case htons(ETH_P_IPV6): { 377 unsigned char pnum = ipv6_hdr(skb)->nexthdr; 378 __be16 frag_off; 379 380 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 381 &frag_off); 382 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) 383 return nf_conntrack_confirm(skb); 384 } 385 break; 386 default: 387 return NF_ACCEPT; 388 } 389 return nf_confirm(skb, protoff, ct, ctinfo); 390 } 391 392 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, 393 const struct nf_hook_state *state) 394 { 395 int ret; 396 397 ret = nf_ct_bridge_confirm(skb); 398 if (ret != NF_ACCEPT) 399 return ret; 400 401 return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post); 402 } 403 404 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { 405 { 406 .hook = nf_ct_bridge_pre, 407 .pf = NFPROTO_BRIDGE, 408 .hooknum = NF_BR_PRE_ROUTING, 409 .priority = NF_IP_PRI_CONNTRACK, 410 }, 411 { 412 .hook = nf_ct_bridge_post, 413 .pf = NFPROTO_BRIDGE, 414 .hooknum = NF_BR_POST_ROUTING, 415 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 416 }, 417 }; 418 419 static struct nf_ct_bridge_info bridge_info = { 420 .ops = nf_ct_bridge_hook_ops, 421 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), 422 .me = THIS_MODULE, 423 }; 424 425 static int __init nf_conntrack_l3proto_bridge_init(void) 426 { 427 nf_ct_bridge_register(&bridge_info); 428 429 return 0; 430 } 431 432 static void __exit nf_conntrack_l3proto_bridge_fini(void) 433 { 434 nf_ct_bridge_unregister(&bridge_info); 435 } 436 437 module_init(nf_conntrack_l3proto_bridge_init); 438 module_exit(nf_conntrack_l3proto_bridge_fini); 439 440 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); 441 MODULE_LICENSE("GPL"); 442