1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #include <linux/types.h> 3 #include <linux/ip.h> 4 #include <linux/netfilter.h> 5 #include <linux/netfilter_ipv6.h> 6 #include <linux/netfilter_bridge.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/icmp.h> 10 #include <linux/sysctl.h> 11 #include <net/route.h> 12 #include <net/ip.h> 13 14 #include <net/netfilter/nf_conntrack.h> 15 #include <net/netfilter/nf_conntrack_core.h> 16 #include <net/netfilter/nf_conntrack_helper.h> 17 #include <net/netfilter/nf_conntrack_bridge.h> 18 19 #include <linux/netfilter/nf_tables.h> 20 #include <net/netfilter/nf_tables.h> 21 22 #include "../br_private.h" 23 24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff 25 * has been linearized or cloned. 26 */ 27 static int nf_br_ip_fragment(struct net *net, struct sock *sk, 28 struct sk_buff *skb, 29 struct nf_bridge_frag_data *data, 30 int (*output)(struct net *, struct sock *sk, 31 const struct nf_bridge_frag_data *data, 32 struct sk_buff *)) 33 { 34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; 35 bool mono_delivery_time = skb->mono_delivery_time; 36 unsigned int hlen, ll_rs, mtu; 37 ktime_t tstamp = skb->tstamp; 38 struct ip_frag_state state; 39 struct iphdr *iph; 40 int err; 41 42 /* for offloaded checksums cleanup checksum before fragmentation */ 43 if (skb->ip_summed == CHECKSUM_PARTIAL && 44 (err = skb_checksum_help(skb))) 45 goto blackhole; 46 47 iph = ip_hdr(skb); 48 49 /* 50 * Setup starting values 51 */ 52 53 hlen = iph->ihl * 4; 54 frag_max_size -= hlen; 55 ll_rs = LL_RESERVED_SPACE(skb->dev); 56 mtu = skb->dev->mtu; 57 58 if (skb_has_frag_list(skb)) { 59 unsigned int first_len = skb_pagelen(skb); 60 struct ip_fraglist_iter iter; 61 struct sk_buff *frag; 62 63 if (first_len - hlen > mtu || 64 skb_headroom(skb) < ll_rs) 65 goto blackhole; 66 67 if (skb_cloned(skb)) 68 goto slow_path; 69 70 skb_walk_frags(skb, frag) { 71 if (frag->len > mtu || 72 skb_headroom(frag) < hlen + ll_rs) 73 goto blackhole; 74 75 if (skb_shared(frag)) 76 goto slow_path; 77 } 78 79 ip_fraglist_init(skb, iph, hlen, &iter); 80 81 for (;;) { 82 if (iter.frag) 83 ip_fraglist_prepare(skb, &iter); 84 85 skb_set_delivery_time(skb, tstamp, mono_delivery_time); 86 err = output(net, sk, data, skb); 87 if (err || !iter.frag) 88 break; 89 90 skb = ip_fraglist_next(&iter); 91 } 92 93 if (!err) 94 return 0; 95 96 kfree_skb_list(iter.frag); 97 98 return err; 99 } 100 slow_path: 101 /* This is a linearized skbuff, the original geometry is lost for us. 102 * This may also be a clone skbuff, we could preserve the geometry for 103 * the copies but probably not worth the effort. 104 */ 105 ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); 106 107 while (state.left > 0) { 108 struct sk_buff *skb2; 109 110 skb2 = ip_frag_next(skb, &state); 111 if (IS_ERR(skb2)) { 112 err = PTR_ERR(skb2); 113 goto blackhole; 114 } 115 116 skb_set_delivery_time(skb2, tstamp, mono_delivery_time); 117 err = output(net, sk, data, skb2); 118 if (err) 119 goto blackhole; 120 } 121 consume_skb(skb); 122 return err; 123 124 blackhole: 125 kfree_skb(skb); 126 return 0; 127 } 128 129 /* ip_defrag() expects IPCB() in place. */ 130 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, 131 size_t inet_skb_parm_size) 132 { 133 memcpy(cb, skb->cb, sizeof(*cb)); 134 memset(skb->cb, 0, inet_skb_parm_size); 135 } 136 137 static void br_skb_cb_restore(struct sk_buff *skb, 138 const struct br_input_skb_cb *cb, 139 u16 fragsz) 140 { 141 memcpy(skb->cb, cb, sizeof(*cb)); 142 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; 143 } 144 145 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, 146 const struct nf_hook_state *state) 147 { 148 u16 zone_id = NF_CT_DEFAULT_ZONE_ID; 149 enum ip_conntrack_info ctinfo; 150 struct br_input_skb_cb cb; 151 const struct nf_conn *ct; 152 int err; 153 154 if (!ip_is_fragment(ip_hdr(skb))) 155 return NF_ACCEPT; 156 157 ct = nf_ct_get(skb, &ctinfo); 158 if (ct) 159 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); 160 161 br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm)); 162 local_bh_disable(); 163 err = ip_defrag(state->net, skb, 164 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); 165 local_bh_enable(); 166 if (!err) { 167 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size); 168 skb->ignore_df = 1; 169 return NF_ACCEPT; 170 } 171 172 return NF_STOLEN; 173 } 174 175 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, 176 const struct nf_hook_state *state) 177 { 178 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 179 u16 zone_id = NF_CT_DEFAULT_ZONE_ID; 180 enum ip_conntrack_info ctinfo; 181 struct br_input_skb_cb cb; 182 const struct nf_conn *ct; 183 int err; 184 185 ct = nf_ct_get(skb, &ctinfo); 186 if (ct) 187 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); 188 189 br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); 190 191 err = nf_ct_frag6_gather(state->net, skb, 192 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); 193 /* queued */ 194 if (err == -EINPROGRESS) 195 return NF_STOLEN; 196 197 br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); 198 return err == 0 ? NF_ACCEPT : NF_DROP; 199 #else 200 return NF_ACCEPT; 201 #endif 202 } 203 204 static int nf_ct_br_ip_check(const struct sk_buff *skb) 205 { 206 const struct iphdr *iph; 207 int nhoff, len; 208 209 nhoff = skb_network_offset(skb); 210 iph = ip_hdr(skb); 211 if (iph->ihl < 5 || 212 iph->version != 4) 213 return -1; 214 215 len = ntohs(iph->tot_len); 216 if (skb->len < nhoff + len || 217 len < (iph->ihl * 4)) 218 return -1; 219 220 return 0; 221 } 222 223 static int nf_ct_br_ipv6_check(const struct sk_buff *skb) 224 { 225 const struct ipv6hdr *hdr; 226 int nhoff, len; 227 228 nhoff = skb_network_offset(skb); 229 hdr = ipv6_hdr(skb); 230 if (hdr->version != 6) 231 return -1; 232 233 len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; 234 if (skb->len < len) 235 return -1; 236 237 return 0; 238 } 239 240 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, 241 const struct nf_hook_state *state) 242 { 243 struct nf_hook_state bridge_state = *state; 244 enum ip_conntrack_info ctinfo; 245 struct nf_conn *ct; 246 u32 len; 247 int ret; 248 249 ct = nf_ct_get(skb, &ctinfo); 250 if ((ct && !nf_ct_is_template(ct)) || 251 ctinfo == IP_CT_UNTRACKED) 252 return NF_ACCEPT; 253 254 switch (skb->protocol) { 255 case htons(ETH_P_IP): 256 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 257 return NF_ACCEPT; 258 259 len = ntohs(ip_hdr(skb)->tot_len); 260 if (pskb_trim_rcsum(skb, len)) 261 return NF_ACCEPT; 262 263 if (nf_ct_br_ip_check(skb)) 264 return NF_ACCEPT; 265 266 bridge_state.pf = NFPROTO_IPV4; 267 ret = nf_ct_br_defrag4(skb, &bridge_state); 268 break; 269 case htons(ETH_P_IPV6): 270 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 271 return NF_ACCEPT; 272 273 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); 274 if (pskb_trim_rcsum(skb, len)) 275 return NF_ACCEPT; 276 277 if (nf_ct_br_ipv6_check(skb)) 278 return NF_ACCEPT; 279 280 bridge_state.pf = NFPROTO_IPV6; 281 ret = nf_ct_br_defrag6(skb, &bridge_state); 282 break; 283 default: 284 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 285 return NF_ACCEPT; 286 } 287 288 if (ret != NF_ACCEPT) 289 return ret; 290 291 return nf_conntrack_in(skb, &bridge_state); 292 } 293 294 static void nf_ct_bridge_frag_save(struct sk_buff *skb, 295 struct nf_bridge_frag_data *data) 296 { 297 if (skb_vlan_tag_present(skb)) { 298 data->vlan_present = true; 299 data->vlan_tci = skb->vlan_tci; 300 data->vlan_proto = skb->vlan_proto; 301 } else { 302 data->vlan_present = false; 303 } 304 skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); 305 } 306 307 static unsigned int 308 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, 309 int (*output)(struct net *, struct sock *sk, 310 const struct nf_bridge_frag_data *data, 311 struct sk_buff *)) 312 { 313 struct nf_bridge_frag_data data; 314 315 if (!BR_INPUT_SKB_CB(skb)->frag_max_size) 316 return NF_ACCEPT; 317 318 nf_ct_bridge_frag_save(skb, &data); 319 switch (skb->protocol) { 320 case htons(ETH_P_IP): 321 nf_br_ip_fragment(state->net, state->sk, skb, &data, output); 322 break; 323 case htons(ETH_P_IPV6): 324 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output); 325 break; 326 default: 327 WARN_ON_ONCE(1); 328 return NF_DROP; 329 } 330 331 return NF_STOLEN; 332 } 333 334 /* Actually only slow path refragmentation needs this. */ 335 static int nf_ct_bridge_frag_restore(struct sk_buff *skb, 336 const struct nf_bridge_frag_data *data) 337 { 338 int err; 339 340 err = skb_cow_head(skb, ETH_HLEN); 341 if (err) { 342 kfree_skb(skb); 343 return -ENOMEM; 344 } 345 if (data->vlan_present) 346 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); 347 else if (skb_vlan_tag_present(skb)) 348 __vlan_hwaccel_clear_tag(skb); 349 350 skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); 351 skb_reset_mac_header(skb); 352 353 return 0; 354 } 355 356 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, 357 const struct nf_bridge_frag_data *data, 358 struct sk_buff *skb) 359 { 360 int err; 361 362 err = nf_ct_bridge_frag_restore(skb, data); 363 if (err < 0) 364 return err; 365 366 return br_dev_queue_push_xmit(net, sk, skb); 367 } 368 369 static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb) 370 { 371 enum ip_conntrack_info ctinfo; 372 struct nf_conn *ct; 373 int protoff; 374 375 ct = nf_ct_get(skb, &ctinfo); 376 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 377 return nf_conntrack_confirm(skb); 378 379 switch (skb->protocol) { 380 case htons(ETH_P_IP): 381 protoff = skb_network_offset(skb) + ip_hdrlen(skb); 382 break; 383 case htons(ETH_P_IPV6): { 384 unsigned char pnum = ipv6_hdr(skb)->nexthdr; 385 __be16 frag_off; 386 387 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 388 &frag_off); 389 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) 390 return nf_conntrack_confirm(skb); 391 } 392 break; 393 default: 394 return NF_ACCEPT; 395 } 396 return nf_confirm(skb, protoff, ct, ctinfo); 397 } 398 399 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, 400 const struct nf_hook_state *state) 401 { 402 int ret; 403 404 ret = nf_ct_bridge_confirm(skb); 405 if (ret != NF_ACCEPT) 406 return ret; 407 408 return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post); 409 } 410 411 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { 412 { 413 .hook = nf_ct_bridge_pre, 414 .pf = NFPROTO_BRIDGE, 415 .hooknum = NF_BR_PRE_ROUTING, 416 .priority = NF_IP_PRI_CONNTRACK, 417 }, 418 { 419 .hook = nf_ct_bridge_post, 420 .pf = NFPROTO_BRIDGE, 421 .hooknum = NF_BR_POST_ROUTING, 422 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 423 }, 424 }; 425 426 static struct nf_ct_bridge_info bridge_info = { 427 .ops = nf_ct_bridge_hook_ops, 428 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), 429 .me = THIS_MODULE, 430 }; 431 432 static int __init nf_conntrack_l3proto_bridge_init(void) 433 { 434 nf_ct_bridge_register(&bridge_info); 435 436 return 0; 437 } 438 439 static void __exit nf_conntrack_l3proto_bridge_fini(void) 440 { 441 nf_ct_bridge_unregister(&bridge_info); 442 } 443 444 module_init(nf_conntrack_l3proto_bridge_init); 445 module_exit(nf_conntrack_l3proto_bridge_fini); 446 447 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); 448 MODULE_LICENSE("GPL"); 449