1 #include <linux/kernel.h> 2 #include <linux/init.h> 3 #include <linux/module.h> 4 #include <linux/netfilter.h> 5 #include <linux/rhashtable.h> 6 #include <linux/netdevice.h> 7 #include <net/ip.h> 8 #include <net/ip6_route.h> 9 #include <net/netfilter/nf_tables.h> 10 #include <net/netfilter/nf_flow_table.h> 11 #include <net/netfilter/nf_conntrack.h> 12 #include <net/netfilter/nf_conntrack_core.h> 13 #include <net/netfilter/nf_conntrack_tuple.h> 14 15 struct flow_offload_entry { 16 struct flow_offload flow; 17 struct nf_conn *ct; 18 struct rcu_head rcu_head; 19 }; 20 21 static DEFINE_MUTEX(flowtable_lock); 22 static LIST_HEAD(flowtables); 23 24 static void 25 flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, 26 struct nf_flow_route *route, 27 enum flow_offload_tuple_dir dir) 28 { 29 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; 30 struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; 31 struct dst_entry *dst = route->tuple[dir].dst; 32 33 ft->dir = dir; 34 35 switch (ctt->src.l3num) { 36 case NFPROTO_IPV4: 37 ft->src_v4 = ctt->src.u3.in; 38 ft->dst_v4 = ctt->dst.u3.in; 39 ft->mtu = ip_dst_mtu_maybe_forward(dst, true); 40 break; 41 case NFPROTO_IPV6: 42 ft->src_v6 = ctt->src.u3.in6; 43 ft->dst_v6 = ctt->dst.u3.in6; 44 ft->mtu = ip6_dst_mtu_forward(dst); 45 break; 46 } 47 48 ft->l3proto = ctt->src.l3num; 49 ft->l4proto = ctt->dst.protonum; 50 ft->src_port = ctt->src.u.tcp.port; 51 ft->dst_port = ctt->dst.u.tcp.port; 52 53 ft->iifidx = route->tuple[dir].ifindex; 54 ft->oifidx = route->tuple[!dir].ifindex; 55 ft->dst_cache = dst; 56 } 57 58 struct flow_offload * 59 flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) 60 { 61 struct flow_offload_entry *entry; 62 struct flow_offload *flow; 63 64 if (unlikely(nf_ct_is_dying(ct) || 65 !atomic_inc_not_zero(&ct->ct_general.use))) 66 return NULL; 67 68 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 69 if (!entry) 70 goto err_ct_refcnt; 71 72 flow = &entry->flow; 73 74 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) 75 goto err_dst_cache_original; 76 77 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) 78 goto err_dst_cache_reply; 79 80 entry->ct = ct; 81 82 flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL); 83 flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY); 84 85 if (ct->status & IPS_SRC_NAT) 86 flow->flags |= FLOW_OFFLOAD_SNAT; 87 if (ct->status & IPS_DST_NAT) 88 flow->flags |= FLOW_OFFLOAD_DNAT; 89 90 return flow; 91 92 err_dst_cache_reply: 93 dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); 94 err_dst_cache_original: 95 kfree(entry); 96 err_ct_refcnt: 97 nf_ct_put(ct); 98 99 return NULL; 100 } 101 EXPORT_SYMBOL_GPL(flow_offload_alloc); 102 103 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) 104 { 105 tcp->state = TCP_CONNTRACK_ESTABLISHED; 106 tcp->seen[0].td_maxwin = 0; 107 tcp->seen[1].td_maxwin = 0; 108 } 109 110 #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) 111 #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) 112 113 static void flow_offload_fixup_ct_state(struct nf_conn *ct) 114 { 115 const struct nf_conntrack_l4proto *l4proto; 116 unsigned int timeout; 117 int l4num; 118 119 l4num = nf_ct_protonum(ct); 120 if (l4num == IPPROTO_TCP) 121 flow_offload_fixup_tcp(&ct->proto.tcp); 122 123 l4proto = __nf_ct_l4proto_find(l4num); 124 if (!l4proto) 125 return; 126 127 if (l4num == IPPROTO_TCP) 128 timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT; 129 else if (l4num == IPPROTO_UDP) 130 timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT; 131 else 132 return; 133 134 ct->timeout = nfct_time_stamp + timeout; 135 } 136 137 void flow_offload_free(struct flow_offload *flow) 138 { 139 struct flow_offload_entry *e; 140 141 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); 142 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); 143 e = container_of(flow, struct flow_offload_entry, flow); 144 if (flow->flags & FLOW_OFFLOAD_DYING) 145 nf_ct_delete(e->ct, 0, 0); 146 nf_ct_put(e->ct); 147 kfree_rcu(e, rcu_head); 148 } 149 EXPORT_SYMBOL_GPL(flow_offload_free); 150 151 static u32 flow_offload_hash(const void *data, u32 len, u32 seed) 152 { 153 const struct flow_offload_tuple *tuple = data; 154 155 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); 156 } 157 158 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) 159 { 160 const struct flow_offload_tuple_rhash *tuplehash = data; 161 162 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); 163 } 164 165 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, 166 const void *ptr) 167 { 168 const struct flow_offload_tuple *tuple = arg->key; 169 const struct flow_offload_tuple_rhash *x = ptr; 170 171 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) 172 return 1; 173 174 return 0; 175 } 176 177 static const struct rhashtable_params nf_flow_offload_rhash_params = { 178 .head_offset = offsetof(struct flow_offload_tuple_rhash, node), 179 .hashfn = flow_offload_hash, 180 .obj_hashfn = flow_offload_hash_obj, 181 .obj_cmpfn = flow_offload_hash_cmp, 182 .automatic_shrinking = true, 183 }; 184 185 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) 186 { 187 flow->timeout = (u32)jiffies; 188 189 rhashtable_insert_fast(&flow_table->rhashtable, 190 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 191 nf_flow_offload_rhash_params); 192 rhashtable_insert_fast(&flow_table->rhashtable, 193 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 194 nf_flow_offload_rhash_params); 195 return 0; 196 } 197 EXPORT_SYMBOL_GPL(flow_offload_add); 198 199 static void flow_offload_del(struct nf_flowtable *flow_table, 200 struct flow_offload *flow) 201 { 202 struct flow_offload_entry *e; 203 204 rhashtable_remove_fast(&flow_table->rhashtable, 205 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 206 nf_flow_offload_rhash_params); 207 rhashtable_remove_fast(&flow_table->rhashtable, 208 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 209 nf_flow_offload_rhash_params); 210 211 e = container_of(flow, struct flow_offload_entry, flow); 212 clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); 213 214 flow_offload_free(flow); 215 } 216 217 void flow_offload_teardown(struct flow_offload *flow) 218 { 219 struct flow_offload_entry *e; 220 221 flow->flags |= FLOW_OFFLOAD_TEARDOWN; 222 223 e = container_of(flow, struct flow_offload_entry, flow); 224 flow_offload_fixup_ct_state(e->ct); 225 } 226 EXPORT_SYMBOL_GPL(flow_offload_teardown); 227 228 struct flow_offload_tuple_rhash * 229 flow_offload_lookup(struct nf_flowtable *flow_table, 230 struct flow_offload_tuple *tuple) 231 { 232 struct flow_offload_tuple_rhash *tuplehash; 233 struct flow_offload *flow; 234 int dir; 235 236 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, 237 nf_flow_offload_rhash_params); 238 if (!tuplehash) 239 return NULL; 240 241 dir = tuplehash->tuple.dir; 242 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 243 if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)) 244 return NULL; 245 246 return tuplehash; 247 } 248 EXPORT_SYMBOL_GPL(flow_offload_lookup); 249 250 int nf_flow_table_iterate(struct nf_flowtable *flow_table, 251 void (*iter)(struct flow_offload *flow, void *data), 252 void *data) 253 { 254 struct flow_offload_tuple_rhash *tuplehash; 255 struct rhashtable_iter hti; 256 struct flow_offload *flow; 257 int err = 0; 258 259 rhashtable_walk_enter(&flow_table->rhashtable, &hti); 260 rhashtable_walk_start(&hti); 261 262 while ((tuplehash = rhashtable_walk_next(&hti))) { 263 if (IS_ERR(tuplehash)) { 264 if (PTR_ERR(tuplehash) != -EAGAIN) { 265 err = PTR_ERR(tuplehash); 266 break; 267 } 268 continue; 269 } 270 if (tuplehash->tuple.dir) 271 continue; 272 273 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); 274 275 iter(flow, data); 276 } 277 rhashtable_walk_stop(&hti); 278 rhashtable_walk_exit(&hti); 279 280 return err; 281 } 282 EXPORT_SYMBOL_GPL(nf_flow_table_iterate); 283 284 static inline bool nf_flow_has_expired(const struct flow_offload *flow) 285 { 286 return (__s32)(flow->timeout - (u32)jiffies) <= 0; 287 } 288 289 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table) 290 { 291 struct flow_offload_tuple_rhash *tuplehash; 292 struct rhashtable_iter hti; 293 struct flow_offload *flow; 294 295 rhashtable_walk_enter(&flow_table->rhashtable, &hti); 296 rhashtable_walk_start(&hti); 297 298 while ((tuplehash = rhashtable_walk_next(&hti))) { 299 if (IS_ERR(tuplehash)) { 300 if (PTR_ERR(tuplehash) != -EAGAIN) 301 break; 302 continue; 303 } 304 if (tuplehash->tuple.dir) 305 continue; 306 307 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); 308 309 if (nf_flow_has_expired(flow) || 310 (flow->flags & (FLOW_OFFLOAD_DYING | 311 FLOW_OFFLOAD_TEARDOWN))) 312 flow_offload_del(flow_table, flow); 313 } 314 rhashtable_walk_stop(&hti); 315 rhashtable_walk_exit(&hti); 316 } 317 318 static void nf_flow_offload_work_gc(struct work_struct *work) 319 { 320 struct nf_flowtable *flow_table; 321 322 flow_table = container_of(work, struct nf_flowtable, gc_work.work); 323 nf_flow_offload_gc_step(flow_table); 324 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); 325 } 326 327 static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, 328 __be16 port, __be16 new_port) 329 { 330 struct tcphdr *tcph; 331 332 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || 333 skb_try_make_writable(skb, thoff + sizeof(*tcph))) 334 return -1; 335 336 tcph = (void *)(skb_network_header(skb) + thoff); 337 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); 338 339 return 0; 340 } 341 342 static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, 343 __be16 port, __be16 new_port) 344 { 345 struct udphdr *udph; 346 347 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || 348 skb_try_make_writable(skb, thoff + sizeof(*udph))) 349 return -1; 350 351 udph = (void *)(skb_network_header(skb) + thoff); 352 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 353 inet_proto_csum_replace2(&udph->check, skb, port, 354 new_port, true); 355 if (!udph->check) 356 udph->check = CSUM_MANGLED_0; 357 } 358 359 return 0; 360 } 361 362 static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, 363 u8 protocol, __be16 port, __be16 new_port) 364 { 365 switch (protocol) { 366 case IPPROTO_TCP: 367 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) 368 return NF_DROP; 369 break; 370 case IPPROTO_UDP: 371 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) 372 return NF_DROP; 373 break; 374 } 375 376 return 0; 377 } 378 379 int nf_flow_snat_port(const struct flow_offload *flow, 380 struct sk_buff *skb, unsigned int thoff, 381 u8 protocol, enum flow_offload_tuple_dir dir) 382 { 383 struct flow_ports *hdr; 384 __be16 port, new_port; 385 386 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || 387 skb_try_make_writable(skb, thoff + sizeof(*hdr))) 388 return -1; 389 390 hdr = (void *)(skb_network_header(skb) + thoff); 391 392 switch (dir) { 393 case FLOW_OFFLOAD_DIR_ORIGINAL: 394 port = hdr->source; 395 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; 396 hdr->source = new_port; 397 break; 398 case FLOW_OFFLOAD_DIR_REPLY: 399 port = hdr->dest; 400 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; 401 hdr->dest = new_port; 402 break; 403 default: 404 return -1; 405 } 406 407 return nf_flow_nat_port(skb, thoff, protocol, port, new_port); 408 } 409 EXPORT_SYMBOL_GPL(nf_flow_snat_port); 410 411 int nf_flow_dnat_port(const struct flow_offload *flow, 412 struct sk_buff *skb, unsigned int thoff, 413 u8 protocol, enum flow_offload_tuple_dir dir) 414 { 415 struct flow_ports *hdr; 416 __be16 port, new_port; 417 418 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || 419 skb_try_make_writable(skb, thoff + sizeof(*hdr))) 420 return -1; 421 422 hdr = (void *)(skb_network_header(skb) + thoff); 423 424 switch (dir) { 425 case FLOW_OFFLOAD_DIR_ORIGINAL: 426 port = hdr->dest; 427 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; 428 hdr->dest = new_port; 429 break; 430 case FLOW_OFFLOAD_DIR_REPLY: 431 port = hdr->source; 432 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; 433 hdr->source = new_port; 434 break; 435 default: 436 return -1; 437 } 438 439 return nf_flow_nat_port(skb, thoff, protocol, port, new_port); 440 } 441 EXPORT_SYMBOL_GPL(nf_flow_dnat_port); 442 443 int nf_flow_table_init(struct nf_flowtable *flowtable) 444 { 445 int err; 446 447 INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); 448 449 err = rhashtable_init(&flowtable->rhashtable, 450 &nf_flow_offload_rhash_params); 451 if (err < 0) 452 return err; 453 454 queue_delayed_work(system_power_efficient_wq, 455 &flowtable->gc_work, HZ); 456 457 mutex_lock(&flowtable_lock); 458 list_add(&flowtable->list, &flowtables); 459 mutex_unlock(&flowtable_lock); 460 461 return 0; 462 } 463 EXPORT_SYMBOL_GPL(nf_flow_table_init); 464 465 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) 466 { 467 struct net_device *dev = data; 468 struct flow_offload_entry *e; 469 470 e = container_of(flow, struct flow_offload_entry, flow); 471 472 if (!dev) { 473 flow_offload_teardown(flow); 474 return; 475 } 476 if (net_eq(nf_ct_net(e->ct), dev_net(dev)) && 477 (flow->tuplehash[0].tuple.iifidx == dev->ifindex || 478 flow->tuplehash[1].tuple.iifidx == dev->ifindex)) 479 flow_offload_dead(flow); 480 } 481 482 static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, 483 struct net_device *dev) 484 { 485 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); 486 flush_delayed_work(&flowtable->gc_work); 487 } 488 489 void nf_flow_table_cleanup(struct net_device *dev) 490 { 491 struct nf_flowtable *flowtable; 492 493 mutex_lock(&flowtable_lock); 494 list_for_each_entry(flowtable, &flowtables, list) 495 nf_flow_table_iterate_cleanup(flowtable, dev); 496 mutex_unlock(&flowtable_lock); 497 } 498 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); 499 500 void nf_flow_table_free(struct nf_flowtable *flow_table) 501 { 502 mutex_lock(&flowtable_lock); 503 list_del(&flow_table->list); 504 mutex_unlock(&flowtable_lock); 505 cancel_delayed_work_sync(&flow_table->gc_work); 506 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); 507 nf_flow_offload_gc_step(flow_table); 508 rhashtable_destroy(&flow_table->rhashtable); 509 } 510 EXPORT_SYMBOL_GPL(nf_flow_table_free); 511 512 MODULE_LICENSE("GPL"); 513 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 514