1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 #include <linux/module.h> 5 #include <linux/netfilter.h> 6 #include <linux/rhashtable.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <linux/netdevice.h> 10 #include <net/ip.h> 11 #include <net/ipv6.h> 12 #include <net/ip6_route.h> 13 #include <net/neighbour.h> 14 #include <net/netfilter/nf_flow_table.h> 15 #include <net/netfilter/nf_conntrack_acct.h> 16 /* For layer 4 checksum field offset. */ 17 #include <linux/tcp.h> 18 #include <linux/udp.h> 19 20 static int nf_flow_state_check(struct flow_offload *flow, int proto, 21 struct sk_buff *skb, unsigned int thoff) 22 { 23 struct tcphdr *tcph; 24 25 if (proto != IPPROTO_TCP) 26 return 0; 27 28 tcph = (void *)(skb_network_header(skb) + thoff); 29 if (unlikely(tcph->fin || tcph->rst)) { 30 flow_offload_teardown(flow); 31 return -1; 32 } 33 34 return 0; 35 } 36 37 static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, 38 __be32 addr, __be32 new_addr) 39 { 40 struct tcphdr *tcph; 41 42 if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) 43 return -1; 44 45 tcph = (void *)(skb_network_header(skb) + thoff); 46 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); 47 48 return 0; 49 } 50 51 static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, 52 __be32 addr, __be32 new_addr) 53 { 54 struct udphdr *udph; 55 56 if (skb_try_make_writable(skb, thoff + sizeof(*udph))) 57 return -1; 58 59 udph = (void *)(skb_network_header(skb) + thoff); 60 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 61 inet_proto_csum_replace4(&udph->check, skb, addr, 62 new_addr, true); 63 if (!udph->check) 64 udph->check = CSUM_MANGLED_0; 65 } 66 67 return 0; 68 } 69 70 static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, 71 unsigned int thoff, __be32 addr, 72 __be32 new_addr) 73 { 74 switch (iph->protocol) { 75 case IPPROTO_TCP: 76 if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) 77 return NF_DROP; 78 break; 79 case IPPROTO_UDP: 80 if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) 81 return NF_DROP; 82 break; 83 } 84 85 return 0; 86 } 87 88 static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, 89 struct iphdr *iph, unsigned int thoff, 90 enum flow_offload_tuple_dir dir) 91 { 92 __be32 addr, new_addr; 93 94 switch (dir) { 95 case FLOW_OFFLOAD_DIR_ORIGINAL: 96 addr = iph->saddr; 97 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; 98 iph->saddr = new_addr; 99 break; 100 case FLOW_OFFLOAD_DIR_REPLY: 101 addr = iph->daddr; 102 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; 103 iph->daddr = new_addr; 104 break; 105 default: 106 return -1; 107 } 108 csum_replace4(&iph->check, addr, new_addr); 109 110 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 111 } 112 113 static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, 114 struct iphdr *iph, unsigned int thoff, 115 enum flow_offload_tuple_dir dir) 116 { 117 __be32 addr, new_addr; 118 119 switch (dir) { 120 case FLOW_OFFLOAD_DIR_ORIGINAL: 121 addr = iph->daddr; 122 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; 123 iph->daddr = new_addr; 124 break; 125 case FLOW_OFFLOAD_DIR_REPLY: 126 addr = iph->saddr; 127 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; 128 iph->saddr = new_addr; 129 break; 130 default: 131 return -1; 132 } 133 csum_replace4(&iph->check, addr, new_addr); 134 135 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 136 } 137 138 static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, 139 unsigned int thoff, enum flow_offload_tuple_dir dir) 140 { 141 struct iphdr *iph = ip_hdr(skb); 142 143 if (test_bit(NF_FLOW_SNAT, &flow->flags) && 144 (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || 145 nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0)) 146 return -1; 147 148 iph = ip_hdr(skb); 149 if (test_bit(NF_FLOW_DNAT, &flow->flags) && 150 (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || 151 nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0)) 152 return -1; 153 154 return 0; 155 } 156 157 static bool ip_has_options(unsigned int thoff) 158 { 159 return thoff != sizeof(struct iphdr); 160 } 161 162 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, 163 struct flow_offload_tuple *tuple) 164 { 165 unsigned int thoff, hdrsize; 166 struct flow_ports *ports; 167 struct iphdr *iph; 168 169 if (!pskb_may_pull(skb, sizeof(*iph))) 170 return -1; 171 172 iph = ip_hdr(skb); 173 thoff = iph->ihl * 4; 174 175 if (ip_is_fragment(iph) || 176 unlikely(ip_has_options(thoff))) 177 return -1; 178 179 switch (iph->protocol) { 180 case IPPROTO_TCP: 181 hdrsize = sizeof(struct tcphdr); 182 break; 183 case IPPROTO_UDP: 184 hdrsize = sizeof(struct udphdr); 185 break; 186 default: 187 return -1; 188 } 189 190 if (iph->ttl <= 1) 191 return -1; 192 193 thoff = iph->ihl * 4; 194 if (!pskb_may_pull(skb, thoff + hdrsize)) 195 return -1; 196 197 iph = ip_hdr(skb); 198 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 199 200 tuple->src_v4.s_addr = iph->saddr; 201 tuple->dst_v4.s_addr = iph->daddr; 202 tuple->src_port = ports->source; 203 tuple->dst_port = ports->dest; 204 tuple->l3proto = AF_INET; 205 tuple->l4proto = iph->protocol; 206 tuple->iifidx = dev->ifindex; 207 208 return 0; 209 } 210 211 /* Based on ip_exceeds_mtu(). */ 212 static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 213 { 214 if (skb->len <= mtu) 215 return false; 216 217 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 218 return false; 219 220 return true; 221 } 222 223 static int nf_flow_offload_dst_check(struct dst_entry *dst) 224 { 225 if (unlikely(dst_xfrm(dst))) 226 return dst_check(dst, 0) ? 0 : -1; 227 228 return 0; 229 } 230 231 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, 232 const struct nf_hook_state *state, 233 struct dst_entry *dst) 234 { 235 skb_orphan(skb); 236 skb_dst_set_noref(skb, dst); 237 dst_output(state->net, state->sk, skb); 238 return NF_STOLEN; 239 } 240 241 unsigned int 242 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, 243 const struct nf_hook_state *state) 244 { 245 struct flow_offload_tuple_rhash *tuplehash; 246 struct nf_flowtable *flow_table = priv; 247 struct flow_offload_tuple tuple = {}; 248 enum flow_offload_tuple_dir dir; 249 struct flow_offload *flow; 250 struct net_device *outdev; 251 struct rtable *rt; 252 unsigned int thoff; 253 struct iphdr *iph; 254 __be32 nexthop; 255 256 if (skb->protocol != htons(ETH_P_IP)) 257 return NF_ACCEPT; 258 259 if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) 260 return NF_ACCEPT; 261 262 tuplehash = flow_offload_lookup(flow_table, &tuple); 263 if (tuplehash == NULL) 264 return NF_ACCEPT; 265 266 dir = tuplehash->tuple.dir; 267 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 268 rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; 269 outdev = rt->dst.dev; 270 271 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) 272 return NF_ACCEPT; 273 274 if (skb_try_make_writable(skb, sizeof(*iph))) 275 return NF_DROP; 276 277 thoff = ip_hdr(skb)->ihl * 4; 278 if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) 279 return NF_ACCEPT; 280 281 flow_offload_refresh(flow_table, flow); 282 283 if (nf_flow_offload_dst_check(&rt->dst)) { 284 flow_offload_teardown(flow); 285 return NF_ACCEPT; 286 } 287 288 if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) 289 return NF_DROP; 290 291 iph = ip_hdr(skb); 292 ip_decrease_ttl(iph); 293 skb->tstamp = 0; 294 295 if (flow_table->flags & NF_FLOWTABLE_COUNTER) 296 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); 297 298 if (unlikely(dst_xfrm(&rt->dst))) { 299 memset(skb->cb, 0, sizeof(struct inet_skb_parm)); 300 IPCB(skb)->iif = skb->dev->ifindex; 301 IPCB(skb)->flags = IPSKB_FORWARDED; 302 return nf_flow_xmit_xfrm(skb, state, &rt->dst); 303 } 304 305 skb->dev = outdev; 306 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); 307 skb_dst_set_noref(skb, &rt->dst); 308 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); 309 310 return NF_STOLEN; 311 } 312 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); 313 314 static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, 315 struct in6_addr *addr, 316 struct in6_addr *new_addr) 317 { 318 struct tcphdr *tcph; 319 320 if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) 321 return -1; 322 323 tcph = (void *)(skb_network_header(skb) + thoff); 324 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, 325 new_addr->s6_addr32, true); 326 327 return 0; 328 } 329 330 static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, 331 struct in6_addr *addr, 332 struct in6_addr *new_addr) 333 { 334 struct udphdr *udph; 335 336 if (skb_try_make_writable(skb, thoff + sizeof(*udph))) 337 return -1; 338 339 udph = (void *)(skb_network_header(skb) + thoff); 340 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 341 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, 342 new_addr->s6_addr32, true); 343 if (!udph->check) 344 udph->check = CSUM_MANGLED_0; 345 } 346 347 return 0; 348 } 349 350 static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, 351 unsigned int thoff, struct in6_addr *addr, 352 struct in6_addr *new_addr) 353 { 354 switch (ip6h->nexthdr) { 355 case IPPROTO_TCP: 356 if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) 357 return NF_DROP; 358 break; 359 case IPPROTO_UDP: 360 if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) 361 return NF_DROP; 362 break; 363 } 364 365 return 0; 366 } 367 368 static int nf_flow_snat_ipv6(const struct flow_offload *flow, 369 struct sk_buff *skb, struct ipv6hdr *ip6h, 370 unsigned int thoff, 371 enum flow_offload_tuple_dir dir) 372 { 373 struct in6_addr addr, new_addr; 374 375 switch (dir) { 376 case FLOW_OFFLOAD_DIR_ORIGINAL: 377 addr = ip6h->saddr; 378 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; 379 ip6h->saddr = new_addr; 380 break; 381 case FLOW_OFFLOAD_DIR_REPLY: 382 addr = ip6h->daddr; 383 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; 384 ip6h->daddr = new_addr; 385 break; 386 default: 387 return -1; 388 } 389 390 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 391 } 392 393 static int nf_flow_dnat_ipv6(const struct flow_offload *flow, 394 struct sk_buff *skb, struct ipv6hdr *ip6h, 395 unsigned int thoff, 396 enum flow_offload_tuple_dir dir) 397 { 398 struct in6_addr addr, new_addr; 399 400 switch (dir) { 401 case FLOW_OFFLOAD_DIR_ORIGINAL: 402 addr = ip6h->daddr; 403 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; 404 ip6h->daddr = new_addr; 405 break; 406 case FLOW_OFFLOAD_DIR_REPLY: 407 addr = ip6h->saddr; 408 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; 409 ip6h->saddr = new_addr; 410 break; 411 default: 412 return -1; 413 } 414 415 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 416 } 417 418 static int nf_flow_nat_ipv6(const struct flow_offload *flow, 419 struct sk_buff *skb, 420 enum flow_offload_tuple_dir dir) 421 { 422 struct ipv6hdr *ip6h = ipv6_hdr(skb); 423 unsigned int thoff = sizeof(*ip6h); 424 425 if (test_bit(NF_FLOW_SNAT, &flow->flags) && 426 (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || 427 nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0)) 428 return -1; 429 430 ip6h = ipv6_hdr(skb); 431 if (test_bit(NF_FLOW_DNAT, &flow->flags) && 432 (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || 433 nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0)) 434 return -1; 435 436 return 0; 437 } 438 439 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, 440 struct flow_offload_tuple *tuple) 441 { 442 unsigned int thoff, hdrsize; 443 struct flow_ports *ports; 444 struct ipv6hdr *ip6h; 445 446 if (!pskb_may_pull(skb, sizeof(*ip6h))) 447 return -1; 448 449 ip6h = ipv6_hdr(skb); 450 451 switch (ip6h->nexthdr) { 452 case IPPROTO_TCP: 453 hdrsize = sizeof(struct tcphdr); 454 break; 455 case IPPROTO_UDP: 456 hdrsize = sizeof(struct udphdr); 457 break; 458 default: 459 return -1; 460 } 461 462 if (ip6h->hop_limit <= 1) 463 return -1; 464 465 thoff = sizeof(*ip6h); 466 if (!pskb_may_pull(skb, thoff + hdrsize)) 467 return -1; 468 469 ip6h = ipv6_hdr(skb); 470 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 471 472 tuple->src_v6 = ip6h->saddr; 473 tuple->dst_v6 = ip6h->daddr; 474 tuple->src_port = ports->source; 475 tuple->dst_port = ports->dest; 476 tuple->l3proto = AF_INET6; 477 tuple->l4proto = ip6h->nexthdr; 478 tuple->iifidx = dev->ifindex; 479 480 return 0; 481 } 482 483 unsigned int 484 nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, 485 const struct nf_hook_state *state) 486 { 487 struct flow_offload_tuple_rhash *tuplehash; 488 struct nf_flowtable *flow_table = priv; 489 struct flow_offload_tuple tuple = {}; 490 enum flow_offload_tuple_dir dir; 491 const struct in6_addr *nexthop; 492 struct flow_offload *flow; 493 struct net_device *outdev; 494 struct ipv6hdr *ip6h; 495 struct rt6_info *rt; 496 497 if (skb->protocol != htons(ETH_P_IPV6)) 498 return NF_ACCEPT; 499 500 if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) 501 return NF_ACCEPT; 502 503 tuplehash = flow_offload_lookup(flow_table, &tuple); 504 if (tuplehash == NULL) 505 return NF_ACCEPT; 506 507 dir = tuplehash->tuple.dir; 508 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 509 rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; 510 outdev = rt->dst.dev; 511 512 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) 513 return NF_ACCEPT; 514 515 if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb, 516 sizeof(*ip6h))) 517 return NF_ACCEPT; 518 519 flow_offload_refresh(flow_table, flow); 520 521 if (nf_flow_offload_dst_check(&rt->dst)) { 522 flow_offload_teardown(flow); 523 return NF_ACCEPT; 524 } 525 526 if (skb_try_make_writable(skb, sizeof(*ip6h))) 527 return NF_DROP; 528 529 if (nf_flow_nat_ipv6(flow, skb, dir) < 0) 530 return NF_DROP; 531 532 ip6h = ipv6_hdr(skb); 533 ip6h->hop_limit--; 534 skb->tstamp = 0; 535 536 if (flow_table->flags & NF_FLOWTABLE_COUNTER) 537 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); 538 539 if (unlikely(dst_xfrm(&rt->dst))) { 540 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 541 IP6CB(skb)->iif = skb->dev->ifindex; 542 IP6CB(skb)->flags = IP6SKB_FORWARDED; 543 return nf_flow_xmit_xfrm(skb, state, &rt->dst); 544 } 545 546 skb->dev = outdev; 547 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); 548 skb_dst_set_noref(skb, &rt->dst); 549 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); 550 551 return NF_STOLEN; 552 } 553 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); 554