1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 #include <linux/module.h> 5 #include <linux/netfilter.h> 6 #include <linux/rhashtable.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <linux/netdevice.h> 10 #include <net/ip.h> 11 #include <net/ipv6.h> 12 #include <net/ip6_route.h> 13 #include <net/neighbour.h> 14 #include <net/netfilter/nf_flow_table.h> 15 #include <net/netfilter/nf_conntrack_acct.h> 16 /* For layer 4 checksum field offset. */ 17 #include <linux/tcp.h> 18 #include <linux/udp.h> 19 20 static int nf_flow_state_check(struct flow_offload *flow, int proto, 21 struct sk_buff *skb, unsigned int thoff) 22 { 23 struct tcphdr *tcph; 24 25 if (proto != IPPROTO_TCP) 26 return 0; 27 28 if (!pskb_may_pull(skb, thoff + sizeof(*tcph))) 29 return -1; 30 31 tcph = (void *)(skb_network_header(skb) + thoff); 32 if (unlikely(tcph->fin || tcph->rst)) { 33 flow_offload_teardown(flow); 34 return -1; 35 } 36 37 return 0; 38 } 39 40 static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, 41 __be32 addr, __be32 new_addr) 42 { 43 struct tcphdr *tcph; 44 45 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || 46 skb_try_make_writable(skb, thoff + sizeof(*tcph))) 47 return -1; 48 49 tcph = (void *)(skb_network_header(skb) + thoff); 50 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); 51 52 return 0; 53 } 54 55 static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, 56 __be32 addr, __be32 new_addr) 57 { 58 struct udphdr *udph; 59 60 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || 61 skb_try_make_writable(skb, thoff + sizeof(*udph))) 62 return -1; 63 64 udph = (void *)(skb_network_header(skb) + thoff); 65 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 66 inet_proto_csum_replace4(&udph->check, skb, addr, 67 new_addr, true); 68 if (!udph->check) 69 udph->check = CSUM_MANGLED_0; 70 } 71 72 return 0; 73 } 74 75 static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, 76 unsigned int thoff, __be32 addr, 77 __be32 new_addr) 78 { 79 switch (iph->protocol) { 80 case IPPROTO_TCP: 81 if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) 82 return NF_DROP; 83 break; 84 case IPPROTO_UDP: 85 if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) 86 return NF_DROP; 87 break; 88 } 89 90 return 0; 91 } 92 93 static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, 94 struct iphdr *iph, unsigned int thoff, 95 enum flow_offload_tuple_dir dir) 96 { 97 __be32 addr, new_addr; 98 99 switch (dir) { 100 case FLOW_OFFLOAD_DIR_ORIGINAL: 101 addr = iph->saddr; 102 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; 103 iph->saddr = new_addr; 104 break; 105 case FLOW_OFFLOAD_DIR_REPLY: 106 addr = iph->daddr; 107 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; 108 iph->daddr = new_addr; 109 break; 110 default: 111 return -1; 112 } 113 csum_replace4(&iph->check, addr, new_addr); 114 115 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 116 } 117 118 static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, 119 struct iphdr *iph, unsigned int thoff, 120 enum flow_offload_tuple_dir dir) 121 { 122 __be32 addr, new_addr; 123 124 switch (dir) { 125 case FLOW_OFFLOAD_DIR_ORIGINAL: 126 addr = iph->daddr; 127 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; 128 iph->daddr = new_addr; 129 break; 130 case FLOW_OFFLOAD_DIR_REPLY: 131 addr = iph->saddr; 132 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; 133 iph->saddr = new_addr; 134 break; 135 default: 136 return -1; 137 } 138 csum_replace4(&iph->check, addr, new_addr); 139 140 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 141 } 142 143 static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, 144 unsigned int thoff, enum flow_offload_tuple_dir dir) 145 { 146 struct iphdr *iph = ip_hdr(skb); 147 148 if (test_bit(NF_FLOW_SNAT, &flow->flags) && 149 (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || 150 nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0)) 151 return -1; 152 153 iph = ip_hdr(skb); 154 if (test_bit(NF_FLOW_DNAT, &flow->flags) && 155 (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || 156 nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0)) 157 return -1; 158 159 return 0; 160 } 161 162 static bool ip_has_options(unsigned int thoff) 163 { 164 return thoff != sizeof(struct iphdr); 165 } 166 167 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, 168 struct flow_offload_tuple *tuple) 169 { 170 struct flow_ports *ports; 171 unsigned int thoff; 172 struct iphdr *iph; 173 174 if (!pskb_may_pull(skb, sizeof(*iph))) 175 return -1; 176 177 iph = ip_hdr(skb); 178 thoff = iph->ihl * 4; 179 180 if (ip_is_fragment(iph) || 181 unlikely(ip_has_options(thoff))) 182 return -1; 183 184 if (iph->protocol != IPPROTO_TCP && 185 iph->protocol != IPPROTO_UDP) 186 return -1; 187 188 if (iph->ttl <= 1) 189 return -1; 190 191 thoff = iph->ihl * 4; 192 if (!pskb_may_pull(skb, thoff + sizeof(*ports))) 193 return -1; 194 195 iph = ip_hdr(skb); 196 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 197 198 tuple->src_v4.s_addr = iph->saddr; 199 tuple->dst_v4.s_addr = iph->daddr; 200 tuple->src_port = ports->source; 201 tuple->dst_port = ports->dest; 202 tuple->l3proto = AF_INET; 203 tuple->l4proto = iph->protocol; 204 tuple->iifidx = dev->ifindex; 205 206 return 0; 207 } 208 209 /* Based on ip_exceeds_mtu(). */ 210 static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 211 { 212 if (skb->len <= mtu) 213 return false; 214 215 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 216 return false; 217 218 return true; 219 } 220 221 static int nf_flow_offload_dst_check(struct dst_entry *dst) 222 { 223 if (unlikely(dst_xfrm(dst))) 224 return dst_check(dst, 0) ? 0 : -1; 225 226 return 0; 227 } 228 229 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, 230 const struct nf_hook_state *state, 231 struct dst_entry *dst) 232 { 233 skb_orphan(skb); 234 skb_dst_set_noref(skb, dst); 235 dst_output(state->net, state->sk, skb); 236 return NF_STOLEN; 237 } 238 239 unsigned int 240 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, 241 const struct nf_hook_state *state) 242 { 243 struct flow_offload_tuple_rhash *tuplehash; 244 struct nf_flowtable *flow_table = priv; 245 struct flow_offload_tuple tuple = {}; 246 enum flow_offload_tuple_dir dir; 247 struct flow_offload *flow; 248 struct net_device *outdev; 249 struct rtable *rt; 250 unsigned int thoff; 251 struct iphdr *iph; 252 __be32 nexthop; 253 254 if (skb->protocol != htons(ETH_P_IP)) 255 return NF_ACCEPT; 256 257 if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) 258 return NF_ACCEPT; 259 260 tuplehash = flow_offload_lookup(flow_table, &tuple); 261 if (tuplehash == NULL) 262 return NF_ACCEPT; 263 264 dir = tuplehash->tuple.dir; 265 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 266 rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; 267 outdev = rt->dst.dev; 268 269 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) 270 return NF_ACCEPT; 271 272 if (skb_try_make_writable(skb, sizeof(*iph))) 273 return NF_DROP; 274 275 thoff = ip_hdr(skb)->ihl * 4; 276 if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) 277 return NF_ACCEPT; 278 279 flow_offload_refresh(flow_table, flow); 280 281 if (nf_flow_offload_dst_check(&rt->dst)) { 282 flow_offload_teardown(flow); 283 return NF_ACCEPT; 284 } 285 286 if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) 287 return NF_DROP; 288 289 iph = ip_hdr(skb); 290 ip_decrease_ttl(iph); 291 skb->tstamp = 0; 292 293 if (flow_table->flags & NF_FLOWTABLE_COUNTER) 294 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); 295 296 if (unlikely(dst_xfrm(&rt->dst))) { 297 memset(skb->cb, 0, sizeof(struct inet_skb_parm)); 298 IPCB(skb)->iif = skb->dev->ifindex; 299 IPCB(skb)->flags = IPSKB_FORWARDED; 300 return nf_flow_xmit_xfrm(skb, state, &rt->dst); 301 } 302 303 skb->dev = outdev; 304 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); 305 skb_dst_set_noref(skb, &rt->dst); 306 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); 307 308 return NF_STOLEN; 309 } 310 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); 311 312 static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, 313 struct in6_addr *addr, 314 struct in6_addr *new_addr) 315 { 316 struct tcphdr *tcph; 317 318 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || 319 skb_try_make_writable(skb, thoff + sizeof(*tcph))) 320 return -1; 321 322 tcph = (void *)(skb_network_header(skb) + thoff); 323 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, 324 new_addr->s6_addr32, true); 325 326 return 0; 327 } 328 329 static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, 330 struct in6_addr *addr, 331 struct in6_addr *new_addr) 332 { 333 struct udphdr *udph; 334 335 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || 336 skb_try_make_writable(skb, thoff + sizeof(*udph))) 337 return -1; 338 339 udph = (void *)(skb_network_header(skb) + thoff); 340 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 341 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, 342 new_addr->s6_addr32, true); 343 if (!udph->check) 344 udph->check = CSUM_MANGLED_0; 345 } 346 347 return 0; 348 } 349 350 static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, 351 unsigned int thoff, struct in6_addr *addr, 352 struct in6_addr *new_addr) 353 { 354 switch (ip6h->nexthdr) { 355 case IPPROTO_TCP: 356 if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) 357 return NF_DROP; 358 break; 359 case IPPROTO_UDP: 360 if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) 361 return NF_DROP; 362 break; 363 } 364 365 return 0; 366 } 367 368 static int nf_flow_snat_ipv6(const struct flow_offload *flow, 369 struct sk_buff *skb, struct ipv6hdr *ip6h, 370 unsigned int thoff, 371 enum flow_offload_tuple_dir dir) 372 { 373 struct in6_addr addr, new_addr; 374 375 switch (dir) { 376 case FLOW_OFFLOAD_DIR_ORIGINAL: 377 addr = ip6h->saddr; 378 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; 379 ip6h->saddr = new_addr; 380 break; 381 case FLOW_OFFLOAD_DIR_REPLY: 382 addr = ip6h->daddr; 383 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; 384 ip6h->daddr = new_addr; 385 break; 386 default: 387 return -1; 388 } 389 390 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 391 } 392 393 static int nf_flow_dnat_ipv6(const struct flow_offload *flow, 394 struct sk_buff *skb, struct ipv6hdr *ip6h, 395 unsigned int thoff, 396 enum flow_offload_tuple_dir dir) 397 { 398 struct in6_addr addr, new_addr; 399 400 switch (dir) { 401 case FLOW_OFFLOAD_DIR_ORIGINAL: 402 addr = ip6h->daddr; 403 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; 404 ip6h->daddr = new_addr; 405 break; 406 case FLOW_OFFLOAD_DIR_REPLY: 407 addr = ip6h->saddr; 408 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; 409 ip6h->saddr = new_addr; 410 break; 411 default: 412 return -1; 413 } 414 415 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 416 } 417 418 static int nf_flow_nat_ipv6(const struct flow_offload *flow, 419 struct sk_buff *skb, 420 enum flow_offload_tuple_dir dir) 421 { 422 struct ipv6hdr *ip6h = ipv6_hdr(skb); 423 unsigned int thoff = sizeof(*ip6h); 424 425 if (test_bit(NF_FLOW_SNAT, &flow->flags) && 426 (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || 427 nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0)) 428 return -1; 429 430 ip6h = ipv6_hdr(skb); 431 if (test_bit(NF_FLOW_DNAT, &flow->flags) && 432 (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || 433 nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0)) 434 return -1; 435 436 return 0; 437 } 438 439 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, 440 struct flow_offload_tuple *tuple) 441 { 442 struct flow_ports *ports; 443 struct ipv6hdr *ip6h; 444 unsigned int thoff; 445 446 if (!pskb_may_pull(skb, sizeof(*ip6h))) 447 return -1; 448 449 ip6h = ipv6_hdr(skb); 450 451 if (ip6h->nexthdr != IPPROTO_TCP && 452 ip6h->nexthdr != IPPROTO_UDP) 453 return -1; 454 455 if (ip6h->hop_limit <= 1) 456 return -1; 457 458 thoff = sizeof(*ip6h); 459 if (!pskb_may_pull(skb, thoff + sizeof(*ports))) 460 return -1; 461 462 ip6h = ipv6_hdr(skb); 463 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 464 465 tuple->src_v6 = ip6h->saddr; 466 tuple->dst_v6 = ip6h->daddr; 467 tuple->src_port = ports->source; 468 tuple->dst_port = ports->dest; 469 tuple->l3proto = AF_INET6; 470 tuple->l4proto = ip6h->nexthdr; 471 tuple->iifidx = dev->ifindex; 472 473 return 0; 474 } 475 476 unsigned int 477 nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, 478 const struct nf_hook_state *state) 479 { 480 struct flow_offload_tuple_rhash *tuplehash; 481 struct nf_flowtable *flow_table = priv; 482 struct flow_offload_tuple tuple = {}; 483 enum flow_offload_tuple_dir dir; 484 const struct in6_addr *nexthop; 485 struct flow_offload *flow; 486 struct net_device *outdev; 487 struct ipv6hdr *ip6h; 488 struct rt6_info *rt; 489 490 if (skb->protocol != htons(ETH_P_IPV6)) 491 return NF_ACCEPT; 492 493 if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) 494 return NF_ACCEPT; 495 496 tuplehash = flow_offload_lookup(flow_table, &tuple); 497 if (tuplehash == NULL) 498 return NF_ACCEPT; 499 500 dir = tuplehash->tuple.dir; 501 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 502 rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; 503 outdev = rt->dst.dev; 504 505 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) 506 return NF_ACCEPT; 507 508 if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb, 509 sizeof(*ip6h))) 510 return NF_ACCEPT; 511 512 flow_offload_refresh(flow_table, flow); 513 514 if (nf_flow_offload_dst_check(&rt->dst)) { 515 flow_offload_teardown(flow); 516 return NF_ACCEPT; 517 } 518 519 if (skb_try_make_writable(skb, sizeof(*ip6h))) 520 return NF_DROP; 521 522 if (nf_flow_nat_ipv6(flow, skb, dir) < 0) 523 return NF_DROP; 524 525 ip6h = ipv6_hdr(skb); 526 ip6h->hop_limit--; 527 skb->tstamp = 0; 528 529 if (flow_table->flags & NF_FLOWTABLE_COUNTER) 530 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); 531 532 if (unlikely(dst_xfrm(&rt->dst))) { 533 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 534 IP6CB(skb)->iif = skb->dev->ifindex; 535 IP6CB(skb)->flags = IP6SKB_FORWARDED; 536 return nf_flow_xmit_xfrm(skb, state, &rt->dst); 537 } 538 539 skb->dev = outdev; 540 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); 541 skb_dst_set_noref(skb, &rt->dst); 542 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); 543 544 return NF_STOLEN; 545 } 546 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); 547