1 #include <linux/kernel.h> 2 #include <linux/init.h> 3 #include <linux/module.h> 4 #include <linux/netfilter.h> 5 #include <linux/rhashtable.h> 6 #include <linux/ip.h> 7 #include <linux/ipv6.h> 8 #include <linux/netdevice.h> 9 #include <net/ip.h> 10 #include <net/ipv6.h> 11 #include <net/ip6_route.h> 12 #include <net/neighbour.h> 13 #include <net/netfilter/nf_flow_table.h> 14 /* For layer 4 checksum field offset. */ 15 #include <linux/tcp.h> 16 #include <linux/udp.h> 17 18 static int nf_flow_state_check(struct flow_offload *flow, int proto, 19 struct sk_buff *skb, unsigned int thoff) 20 { 21 struct tcphdr *tcph; 22 23 if (proto != IPPROTO_TCP) 24 return 0; 25 26 if (!pskb_may_pull(skb, thoff + sizeof(*tcph))) 27 return -1; 28 29 tcph = (void *)(skb_network_header(skb) + thoff); 30 if (unlikely(tcph->fin || tcph->rst)) { 31 flow_offload_teardown(flow); 32 return -1; 33 } 34 35 return 0; 36 } 37 38 static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, 39 __be32 addr, __be32 new_addr) 40 { 41 struct tcphdr *tcph; 42 43 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || 44 skb_try_make_writable(skb, thoff + sizeof(*tcph))) 45 return -1; 46 47 tcph = (void *)(skb_network_header(skb) + thoff); 48 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); 49 50 return 0; 51 } 52 53 static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, 54 __be32 addr, __be32 new_addr) 55 { 56 struct udphdr *udph; 57 58 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || 59 skb_try_make_writable(skb, thoff + sizeof(*udph))) 60 return -1; 61 62 udph = (void *)(skb_network_header(skb) + thoff); 63 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 64 inet_proto_csum_replace4(&udph->check, skb, addr, 65 new_addr, true); 66 if (!udph->check) 67 udph->check = CSUM_MANGLED_0; 68 } 69 70 return 0; 71 } 72 73 static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, 74 unsigned int thoff, __be32 addr, 75 __be32 new_addr) 76 { 77 switch (iph->protocol) { 78 case IPPROTO_TCP: 79 if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) 80 return NF_DROP; 81 break; 82 case IPPROTO_UDP: 83 if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) 84 return NF_DROP; 85 break; 86 } 87 88 return 0; 89 } 90 91 static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, 92 struct iphdr *iph, unsigned int thoff, 93 enum flow_offload_tuple_dir dir) 94 { 95 __be32 addr, new_addr; 96 97 switch (dir) { 98 case FLOW_OFFLOAD_DIR_ORIGINAL: 99 addr = iph->saddr; 100 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; 101 iph->saddr = new_addr; 102 break; 103 case FLOW_OFFLOAD_DIR_REPLY: 104 addr = iph->daddr; 105 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; 106 iph->daddr = new_addr; 107 break; 108 default: 109 return -1; 110 } 111 csum_replace4(&iph->check, addr, new_addr); 112 113 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 114 } 115 116 static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, 117 struct iphdr *iph, unsigned int thoff, 118 enum flow_offload_tuple_dir dir) 119 { 120 __be32 addr, new_addr; 121 122 switch (dir) { 123 case FLOW_OFFLOAD_DIR_ORIGINAL: 124 addr = iph->daddr; 125 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; 126 iph->daddr = new_addr; 127 break; 128 case FLOW_OFFLOAD_DIR_REPLY: 129 addr = iph->saddr; 130 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; 131 iph->saddr = new_addr; 132 break; 133 default: 134 return -1; 135 } 136 csum_replace4(&iph->check, addr, new_addr); 137 138 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 139 } 140 141 static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, 142 unsigned int thoff, enum flow_offload_tuple_dir dir) 143 { 144 struct iphdr *iph = ip_hdr(skb); 145 146 if (flow->flags & FLOW_OFFLOAD_SNAT && 147 (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || 148 nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) 149 return -1; 150 if (flow->flags & FLOW_OFFLOAD_DNAT && 151 (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || 152 nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) 153 return -1; 154 155 return 0; 156 } 157 158 static bool ip_has_options(unsigned int thoff) 159 { 160 return thoff != sizeof(struct iphdr); 161 } 162 163 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, 164 struct flow_offload_tuple *tuple) 165 { 166 struct flow_ports *ports; 167 unsigned int thoff; 168 struct iphdr *iph; 169 170 if (!pskb_may_pull(skb, sizeof(*iph))) 171 return -1; 172 173 iph = ip_hdr(skb); 174 thoff = iph->ihl * 4; 175 176 if (ip_is_fragment(iph) || 177 unlikely(ip_has_options(thoff))) 178 return -1; 179 180 if (iph->protocol != IPPROTO_TCP && 181 iph->protocol != IPPROTO_UDP) 182 return -1; 183 184 thoff = iph->ihl * 4; 185 if (!pskb_may_pull(skb, thoff + sizeof(*ports))) 186 return -1; 187 188 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 189 190 tuple->src_v4.s_addr = iph->saddr; 191 tuple->dst_v4.s_addr = iph->daddr; 192 tuple->src_port = ports->source; 193 tuple->dst_port = ports->dest; 194 tuple->l3proto = AF_INET; 195 tuple->l4proto = iph->protocol; 196 tuple->iifidx = dev->ifindex; 197 198 return 0; 199 } 200 201 /* Based on ip_exceeds_mtu(). */ 202 static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 203 { 204 if (skb->len <= mtu) 205 return false; 206 207 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 208 return false; 209 210 return true; 211 } 212 213 unsigned int 214 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, 215 const struct nf_hook_state *state) 216 { 217 struct flow_offload_tuple_rhash *tuplehash; 218 struct nf_flowtable *flow_table = priv; 219 struct flow_offload_tuple tuple = {}; 220 enum flow_offload_tuple_dir dir; 221 struct flow_offload *flow; 222 struct net_device *outdev; 223 struct rtable *rt; 224 unsigned int thoff; 225 struct iphdr *iph; 226 __be32 nexthop; 227 228 if (skb->protocol != htons(ETH_P_IP)) 229 return NF_ACCEPT; 230 231 if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) 232 return NF_ACCEPT; 233 234 tuplehash = flow_offload_lookup(flow_table, &tuple); 235 if (tuplehash == NULL) 236 return NF_ACCEPT; 237 238 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); 239 if (!outdev) 240 return NF_ACCEPT; 241 242 dir = tuplehash->tuple.dir; 243 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 244 rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; 245 246 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && 247 (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) 248 return NF_ACCEPT; 249 250 if (skb_try_make_writable(skb, sizeof(*iph))) 251 return NF_DROP; 252 253 thoff = ip_hdr(skb)->ihl * 4; 254 if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) 255 return NF_ACCEPT; 256 257 if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) 258 return NF_DROP; 259 260 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; 261 iph = ip_hdr(skb); 262 ip_decrease_ttl(iph); 263 264 skb->dev = outdev; 265 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); 266 skb_dst_set_noref(skb, &rt->dst); 267 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); 268 269 return NF_STOLEN; 270 } 271 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); 272 273 static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, 274 struct in6_addr *addr, 275 struct in6_addr *new_addr) 276 { 277 struct tcphdr *tcph; 278 279 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || 280 skb_try_make_writable(skb, thoff + sizeof(*tcph))) 281 return -1; 282 283 tcph = (void *)(skb_network_header(skb) + thoff); 284 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, 285 new_addr->s6_addr32, true); 286 287 return 0; 288 } 289 290 static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, 291 struct in6_addr *addr, 292 struct in6_addr *new_addr) 293 { 294 struct udphdr *udph; 295 296 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || 297 skb_try_make_writable(skb, thoff + sizeof(*udph))) 298 return -1; 299 300 udph = (void *)(skb_network_header(skb) + thoff); 301 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 302 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, 303 new_addr->s6_addr32, true); 304 if (!udph->check) 305 udph->check = CSUM_MANGLED_0; 306 } 307 308 return 0; 309 } 310 311 static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, 312 unsigned int thoff, struct in6_addr *addr, 313 struct in6_addr *new_addr) 314 { 315 switch (ip6h->nexthdr) { 316 case IPPROTO_TCP: 317 if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) 318 return NF_DROP; 319 break; 320 case IPPROTO_UDP: 321 if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) 322 return NF_DROP; 323 break; 324 } 325 326 return 0; 327 } 328 329 static int nf_flow_snat_ipv6(const struct flow_offload *flow, 330 struct sk_buff *skb, struct ipv6hdr *ip6h, 331 unsigned int thoff, 332 enum flow_offload_tuple_dir dir) 333 { 334 struct in6_addr addr, new_addr; 335 336 switch (dir) { 337 case FLOW_OFFLOAD_DIR_ORIGINAL: 338 addr = ip6h->saddr; 339 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; 340 ip6h->saddr = new_addr; 341 break; 342 case FLOW_OFFLOAD_DIR_REPLY: 343 addr = ip6h->daddr; 344 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; 345 ip6h->daddr = new_addr; 346 break; 347 default: 348 return -1; 349 } 350 351 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 352 } 353 354 static int nf_flow_dnat_ipv6(const struct flow_offload *flow, 355 struct sk_buff *skb, struct ipv6hdr *ip6h, 356 unsigned int thoff, 357 enum flow_offload_tuple_dir dir) 358 { 359 struct in6_addr addr, new_addr; 360 361 switch (dir) { 362 case FLOW_OFFLOAD_DIR_ORIGINAL: 363 addr = ip6h->daddr; 364 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; 365 ip6h->daddr = new_addr; 366 break; 367 case FLOW_OFFLOAD_DIR_REPLY: 368 addr = ip6h->saddr; 369 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; 370 ip6h->saddr = new_addr; 371 break; 372 default: 373 return -1; 374 } 375 376 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 377 } 378 379 static int nf_flow_nat_ipv6(const struct flow_offload *flow, 380 struct sk_buff *skb, 381 enum flow_offload_tuple_dir dir) 382 { 383 struct ipv6hdr *ip6h = ipv6_hdr(skb); 384 unsigned int thoff = sizeof(*ip6h); 385 386 if (flow->flags & FLOW_OFFLOAD_SNAT && 387 (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || 388 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) 389 return -1; 390 if (flow->flags & FLOW_OFFLOAD_DNAT && 391 (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || 392 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) 393 return -1; 394 395 return 0; 396 } 397 398 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, 399 struct flow_offload_tuple *tuple) 400 { 401 struct flow_ports *ports; 402 struct ipv6hdr *ip6h; 403 unsigned int thoff; 404 405 if (!pskb_may_pull(skb, sizeof(*ip6h))) 406 return -1; 407 408 ip6h = ipv6_hdr(skb); 409 410 if (ip6h->nexthdr != IPPROTO_TCP && 411 ip6h->nexthdr != IPPROTO_UDP) 412 return -1; 413 414 thoff = sizeof(*ip6h); 415 if (!pskb_may_pull(skb, thoff + sizeof(*ports))) 416 return -1; 417 418 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 419 420 tuple->src_v6 = ip6h->saddr; 421 tuple->dst_v6 = ip6h->daddr; 422 tuple->src_port = ports->source; 423 tuple->dst_port = ports->dest; 424 tuple->l3proto = AF_INET6; 425 tuple->l4proto = ip6h->nexthdr; 426 tuple->iifidx = dev->ifindex; 427 428 return 0; 429 } 430 431 unsigned int 432 nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, 433 const struct nf_hook_state *state) 434 { 435 struct flow_offload_tuple_rhash *tuplehash; 436 struct nf_flowtable *flow_table = priv; 437 struct flow_offload_tuple tuple = {}; 438 enum flow_offload_tuple_dir dir; 439 struct flow_offload *flow; 440 struct net_device *outdev; 441 struct in6_addr *nexthop; 442 struct ipv6hdr *ip6h; 443 struct rt6_info *rt; 444 445 if (skb->protocol != htons(ETH_P_IPV6)) 446 return NF_ACCEPT; 447 448 if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) 449 return NF_ACCEPT; 450 451 tuplehash = flow_offload_lookup(flow_table, &tuple); 452 if (tuplehash == NULL) 453 return NF_ACCEPT; 454 455 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); 456 if (!outdev) 457 return NF_ACCEPT; 458 459 dir = tuplehash->tuple.dir; 460 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 461 rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; 462 463 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) 464 return NF_ACCEPT; 465 466 if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb, 467 sizeof(*ip6h))) 468 return NF_ACCEPT; 469 470 if (skb_try_make_writable(skb, sizeof(*ip6h))) 471 return NF_DROP; 472 473 if (nf_flow_nat_ipv6(flow, skb, dir) < 0) 474 return NF_DROP; 475 476 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; 477 ip6h = ipv6_hdr(skb); 478 ip6h->hop_limit--; 479 480 skb->dev = outdev; 481 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); 482 skb_dst_set_noref(skb, &rt->dst); 483 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); 484 485 return NF_STOLEN; 486 } 487 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); 488