1 #include <linux/kernel.h> 2 #include <linux/init.h> 3 #include <linux/module.h> 4 #include <linux/netfilter.h> 5 #include <linux/rhashtable.h> 6 #include <linux/netdevice.h> 7 #include <linux/tc_act/tc_csum.h> 8 #include <net/flow_offload.h> 9 #include <net/netfilter/nf_flow_table.h> 10 #include <net/netfilter/nf_tables.h> 11 #include <net/netfilter/nf_conntrack.h> 12 #include <net/netfilter/nf_conntrack_acct.h> 13 #include <net/netfilter/nf_conntrack_core.h> 14 #include <net/netfilter/nf_conntrack_tuple.h> 15 16 static struct workqueue_struct *nf_flow_offload_wq; 17 18 struct flow_offload_work { 19 struct list_head list; 20 enum flow_cls_command cmd; 21 int priority; 22 struct nf_flowtable *flowtable; 23 struct flow_offload *flow; 24 struct work_struct work; 25 }; 26 27 #define NF_FLOW_DISSECTOR(__match, __type, __field) \ 28 (__match)->dissector.offset[__type] = \ 29 offsetof(struct nf_flow_key, __field) 30 31 static void nf_flow_rule_lwt_match(struct nf_flow_match *match, 32 struct ip_tunnel_info *tun_info) 33 { 34 struct nf_flow_key *mask = &match->mask; 35 struct nf_flow_key *key = &match->key; 36 unsigned int enc_keys; 37 38 if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX)) 39 return; 40 41 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); 42 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); 43 key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id); 44 mask->enc_key_id.keyid = 0xffffffff; 45 enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 46 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL); 47 48 if (ip_tunnel_info_af(tun_info) == AF_INET) { 49 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 50 enc_ipv4); 51 key->enc_ipv4.src = tun_info->key.u.ipv4.dst; 52 key->enc_ipv4.dst = tun_info->key.u.ipv4.src; 53 if (key->enc_ipv4.src) 54 mask->enc_ipv4.src = 0xffffffff; 55 if (key->enc_ipv4.dst) 56 mask->enc_ipv4.dst = 0xffffffff; 57 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); 58 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 59 } else { 60 memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst, 61 sizeof(struct in6_addr)); 62 memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src, 63 sizeof(struct in6_addr)); 64 if (memcmp(&key->enc_ipv6.src, &in6addr_any, 65 sizeof(struct in6_addr))) 66 memset(&key->enc_ipv6.src, 0xff, 67 sizeof(struct in6_addr)); 68 if (memcmp(&key->enc_ipv6.dst, &in6addr_any, 69 sizeof(struct in6_addr))) 70 memset(&key->enc_ipv6.dst, 0xff, 71 sizeof(struct in6_addr)); 72 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); 73 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 74 } 75 76 match->dissector.used_keys |= enc_keys; 77 } 78 79 static int nf_flow_rule_match(struct nf_flow_match *match, 80 const struct flow_offload_tuple *tuple, 81 struct dst_entry *other_dst) 82 { 83 struct nf_flow_key *mask = &match->mask; 84 struct nf_flow_key *key = &match->key; 85 struct ip_tunnel_info *tun_info; 86 87 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); 88 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); 89 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic); 90 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); 91 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); 92 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); 93 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); 94 95 if (other_dst && other_dst->lwtstate) { 96 tun_info = lwt_tun_info(other_dst->lwtstate); 97 nf_flow_rule_lwt_match(match, tun_info); 98 } 99 100 key->meta.ingress_ifindex = tuple->iifidx; 101 mask->meta.ingress_ifindex = 0xffffffff; 102 103 switch (tuple->l3proto) { 104 case AF_INET: 105 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 106 key->basic.n_proto = htons(ETH_P_IP); 107 key->ipv4.src = tuple->src_v4.s_addr; 108 mask->ipv4.src = 0xffffffff; 109 key->ipv4.dst = tuple->dst_v4.s_addr; 110 mask->ipv4.dst = 0xffffffff; 111 break; 112 case AF_INET6: 113 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 114 key->basic.n_proto = htons(ETH_P_IPV6); 115 key->ipv6.src = tuple->src_v6; 116 memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src)); 117 key->ipv6.dst = tuple->dst_v6; 118 memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst)); 119 break; 120 default: 121 return -EOPNOTSUPP; 122 } 123 mask->control.addr_type = 0xffff; 124 match->dissector.used_keys |= BIT(key->control.addr_type); 125 mask->basic.n_proto = 0xffff; 126 127 switch (tuple->l4proto) { 128 case IPPROTO_TCP: 129 key->tcp.flags = 0; 130 mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16); 131 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); 132 break; 133 case IPPROTO_UDP: 134 break; 135 default: 136 return -EOPNOTSUPP; 137 } 138 139 key->basic.ip_proto = tuple->l4proto; 140 mask->basic.ip_proto = 0xff; 141 142 key->tp.src = tuple->src_port; 143 mask->tp.src = 0xffff; 144 key->tp.dst = tuple->dst_port; 145 mask->tp.dst = 0xffff; 146 147 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | 148 BIT(FLOW_DISSECTOR_KEY_CONTROL) | 149 BIT(FLOW_DISSECTOR_KEY_BASIC) | 150 BIT(FLOW_DISSECTOR_KEY_PORTS); 151 return 0; 152 } 153 154 static void flow_offload_mangle(struct flow_action_entry *entry, 155 enum flow_action_mangle_base htype, u32 offset, 156 const __be32 *value, const __be32 *mask) 157 { 158 entry->id = FLOW_ACTION_MANGLE; 159 entry->mangle.htype = htype; 160 entry->mangle.offset = offset; 161 memcpy(&entry->mangle.mask, mask, sizeof(u32)); 162 memcpy(&entry->mangle.val, value, sizeof(u32)); 163 } 164 165 static inline struct flow_action_entry * 166 flow_action_entry_next(struct nf_flow_rule *flow_rule) 167 { 168 int i = flow_rule->rule->action.num_entries++; 169 170 return &flow_rule->rule->action.entries[i]; 171 } 172 173 static int flow_offload_eth_src(struct net *net, 174 const struct flow_offload *flow, 175 enum flow_offload_tuple_dir dir, 176 struct nf_flow_rule *flow_rule) 177 { 178 const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple; 179 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); 180 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); 181 struct net_device *dev; 182 u32 mask, val; 183 u16 val16; 184 185 dev = dev_get_by_index(net, tuple->iifidx); 186 if (!dev) 187 return -ENOENT; 188 189 mask = ~0xffff0000; 190 memcpy(&val16, dev->dev_addr, 2); 191 val = val16 << 16; 192 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, 193 &val, &mask); 194 195 mask = ~0xffffffff; 196 memcpy(&val, dev->dev_addr + 2, 4); 197 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, 198 &val, &mask); 199 dev_put(dev); 200 201 return 0; 202 } 203 204 static int flow_offload_eth_dst(struct net *net, 205 const struct flow_offload *flow, 206 enum flow_offload_tuple_dir dir, 207 struct nf_flow_rule *flow_rule) 208 { 209 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); 210 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); 211 const void *daddr = &flow->tuplehash[!dir].tuple.src_v4; 212 const struct dst_entry *dst_cache; 213 unsigned char ha[ETH_ALEN]; 214 struct neighbour *n; 215 u32 mask, val; 216 u8 nud_state; 217 u16 val16; 218 219 dst_cache = flow->tuplehash[dir].tuple.dst_cache; 220 n = dst_neigh_lookup(dst_cache, daddr); 221 if (!n) 222 return -ENOENT; 223 224 read_lock_bh(&n->lock); 225 nud_state = n->nud_state; 226 ether_addr_copy(ha, n->ha); 227 read_unlock_bh(&n->lock); 228 229 if (!(nud_state & NUD_VALID)) { 230 neigh_release(n); 231 return -ENOENT; 232 } 233 234 mask = ~0xffffffff; 235 memcpy(&val, ha, 4); 236 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0, 237 &val, &mask); 238 239 mask = ~0x0000ffff; 240 memcpy(&val16, ha + 4, 2); 241 val = val16; 242 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, 243 &val, &mask); 244 neigh_release(n); 245 246 return 0; 247 } 248 249 static void flow_offload_ipv4_snat(struct net *net, 250 const struct flow_offload *flow, 251 enum flow_offload_tuple_dir dir, 252 struct nf_flow_rule *flow_rule) 253 { 254 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 255 u32 mask = ~htonl(0xffffffff); 256 __be32 addr; 257 u32 offset; 258 259 switch (dir) { 260 case FLOW_OFFLOAD_DIR_ORIGINAL: 261 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; 262 offset = offsetof(struct iphdr, saddr); 263 break; 264 case FLOW_OFFLOAD_DIR_REPLY: 265 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; 266 offset = offsetof(struct iphdr, daddr); 267 break; 268 default: 269 return; 270 } 271 272 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, 273 &addr, &mask); 274 } 275 276 static void flow_offload_ipv4_dnat(struct net *net, 277 const struct flow_offload *flow, 278 enum flow_offload_tuple_dir dir, 279 struct nf_flow_rule *flow_rule) 280 { 281 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 282 u32 mask = ~htonl(0xffffffff); 283 __be32 addr; 284 u32 offset; 285 286 switch (dir) { 287 case FLOW_OFFLOAD_DIR_ORIGINAL: 288 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; 289 offset = offsetof(struct iphdr, daddr); 290 break; 291 case FLOW_OFFLOAD_DIR_REPLY: 292 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; 293 offset = offsetof(struct iphdr, saddr); 294 break; 295 default: 296 return; 297 } 298 299 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, 300 &addr, &mask); 301 } 302 303 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, 304 unsigned int offset, 305 const __be32 *addr, const __be32 *mask) 306 { 307 struct flow_action_entry *entry; 308 int i; 309 310 for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) { 311 entry = flow_action_entry_next(flow_rule); 312 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, 313 offset + i, &addr[i], mask); 314 } 315 } 316 317 static void flow_offload_ipv6_snat(struct net *net, 318 const struct flow_offload *flow, 319 enum flow_offload_tuple_dir dir, 320 struct nf_flow_rule *flow_rule) 321 { 322 u32 mask = ~htonl(0xffffffff); 323 const __be32 *addr; 324 u32 offset; 325 326 switch (dir) { 327 case FLOW_OFFLOAD_DIR_ORIGINAL: 328 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32; 329 offset = offsetof(struct ipv6hdr, saddr); 330 break; 331 case FLOW_OFFLOAD_DIR_REPLY: 332 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32; 333 offset = offsetof(struct ipv6hdr, daddr); 334 break; 335 default: 336 return; 337 } 338 339 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); 340 } 341 342 static void flow_offload_ipv6_dnat(struct net *net, 343 const struct flow_offload *flow, 344 enum flow_offload_tuple_dir dir, 345 struct nf_flow_rule *flow_rule) 346 { 347 u32 mask = ~htonl(0xffffffff); 348 const __be32 *addr; 349 u32 offset; 350 351 switch (dir) { 352 case FLOW_OFFLOAD_DIR_ORIGINAL: 353 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32; 354 offset = offsetof(struct ipv6hdr, daddr); 355 break; 356 case FLOW_OFFLOAD_DIR_REPLY: 357 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32; 358 offset = offsetof(struct ipv6hdr, saddr); 359 break; 360 default: 361 return; 362 } 363 364 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); 365 } 366 367 static int flow_offload_l4proto(const struct flow_offload *flow) 368 { 369 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; 370 u8 type = 0; 371 372 switch (protonum) { 373 case IPPROTO_TCP: 374 type = FLOW_ACT_MANGLE_HDR_TYPE_TCP; 375 break; 376 case IPPROTO_UDP: 377 type = FLOW_ACT_MANGLE_HDR_TYPE_UDP; 378 break; 379 default: 380 break; 381 } 382 383 return type; 384 } 385 386 static void flow_offload_port_snat(struct net *net, 387 const struct flow_offload *flow, 388 enum flow_offload_tuple_dir dir, 389 struct nf_flow_rule *flow_rule) 390 { 391 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 392 u32 mask, port; 393 u32 offset; 394 395 switch (dir) { 396 case FLOW_OFFLOAD_DIR_ORIGINAL: 397 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); 398 offset = 0; /* offsetof(struct tcphdr, source); */ 399 port = htonl(port << 16); 400 mask = ~htonl(0xffff0000); 401 break; 402 case FLOW_OFFLOAD_DIR_REPLY: 403 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port); 404 offset = 0; /* offsetof(struct tcphdr, dest); */ 405 port = htonl(port); 406 mask = ~htonl(0xffff); 407 break; 408 default: 409 return; 410 } 411 412 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, 413 &port, &mask); 414 } 415 416 static void flow_offload_port_dnat(struct net *net, 417 const struct flow_offload *flow, 418 enum flow_offload_tuple_dir dir, 419 struct nf_flow_rule *flow_rule) 420 { 421 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 422 u32 mask, port; 423 u32 offset; 424 425 switch (dir) { 426 case FLOW_OFFLOAD_DIR_ORIGINAL: 427 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); 428 offset = 0; /* offsetof(struct tcphdr, dest); */ 429 port = htonl(port); 430 mask = ~htonl(0xffff); 431 break; 432 case FLOW_OFFLOAD_DIR_REPLY: 433 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port); 434 offset = 0; /* offsetof(struct tcphdr, source); */ 435 port = htonl(port << 16); 436 mask = ~htonl(0xffff0000); 437 break; 438 default: 439 return; 440 } 441 442 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, 443 &port, &mask); 444 } 445 446 static void flow_offload_ipv4_checksum(struct net *net, 447 const struct flow_offload *flow, 448 struct nf_flow_rule *flow_rule) 449 { 450 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; 451 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 452 453 entry->id = FLOW_ACTION_CSUM; 454 entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; 455 456 switch (protonum) { 457 case IPPROTO_TCP: 458 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP; 459 break; 460 case IPPROTO_UDP: 461 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP; 462 break; 463 } 464 } 465 466 static void flow_offload_redirect(const struct flow_offload *flow, 467 enum flow_offload_tuple_dir dir, 468 struct nf_flow_rule *flow_rule) 469 { 470 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 471 struct rtable *rt; 472 473 rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; 474 entry->id = FLOW_ACTION_REDIRECT; 475 entry->dev = rt->dst.dev; 476 dev_hold(rt->dst.dev); 477 } 478 479 static void flow_offload_encap_tunnel(const struct flow_offload *flow, 480 enum flow_offload_tuple_dir dir, 481 struct nf_flow_rule *flow_rule) 482 { 483 struct flow_action_entry *entry; 484 struct dst_entry *dst; 485 486 dst = flow->tuplehash[dir].tuple.dst_cache; 487 if (dst && dst->lwtstate) { 488 struct ip_tunnel_info *tun_info; 489 490 tun_info = lwt_tun_info(dst->lwtstate); 491 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { 492 entry = flow_action_entry_next(flow_rule); 493 entry->id = FLOW_ACTION_TUNNEL_ENCAP; 494 entry->tunnel = tun_info; 495 } 496 } 497 } 498 499 static void flow_offload_decap_tunnel(const struct flow_offload *flow, 500 enum flow_offload_tuple_dir dir, 501 struct nf_flow_rule *flow_rule) 502 { 503 struct flow_action_entry *entry; 504 struct dst_entry *dst; 505 506 dst = flow->tuplehash[!dir].tuple.dst_cache; 507 if (dst && dst->lwtstate) { 508 struct ip_tunnel_info *tun_info; 509 510 tun_info = lwt_tun_info(dst->lwtstate); 511 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { 512 entry = flow_action_entry_next(flow_rule); 513 entry->id = FLOW_ACTION_TUNNEL_DECAP; 514 } 515 } 516 } 517 518 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, 519 enum flow_offload_tuple_dir dir, 520 struct nf_flow_rule *flow_rule) 521 { 522 flow_offload_decap_tunnel(flow, dir, flow_rule); 523 flow_offload_encap_tunnel(flow, dir, flow_rule); 524 525 if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || 526 flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) 527 return -1; 528 529 if (test_bit(NF_FLOW_SNAT, &flow->flags)) { 530 flow_offload_ipv4_snat(net, flow, dir, flow_rule); 531 flow_offload_port_snat(net, flow, dir, flow_rule); 532 } 533 if (test_bit(NF_FLOW_DNAT, &flow->flags)) { 534 flow_offload_ipv4_dnat(net, flow, dir, flow_rule); 535 flow_offload_port_dnat(net, flow, dir, flow_rule); 536 } 537 if (test_bit(NF_FLOW_SNAT, &flow->flags) || 538 test_bit(NF_FLOW_DNAT, &flow->flags)) 539 flow_offload_ipv4_checksum(net, flow, flow_rule); 540 541 flow_offload_redirect(flow, dir, flow_rule); 542 543 return 0; 544 } 545 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4); 546 547 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, 548 enum flow_offload_tuple_dir dir, 549 struct nf_flow_rule *flow_rule) 550 { 551 flow_offload_decap_tunnel(flow, dir, flow_rule); 552 flow_offload_encap_tunnel(flow, dir, flow_rule); 553 554 if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || 555 flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) 556 return -1; 557 558 if (test_bit(NF_FLOW_SNAT, &flow->flags)) { 559 flow_offload_ipv6_snat(net, flow, dir, flow_rule); 560 flow_offload_port_snat(net, flow, dir, flow_rule); 561 } 562 if (test_bit(NF_FLOW_DNAT, &flow->flags)) { 563 flow_offload_ipv6_dnat(net, flow, dir, flow_rule); 564 flow_offload_port_dnat(net, flow, dir, flow_rule); 565 } 566 567 flow_offload_redirect(flow, dir, flow_rule); 568 569 return 0; 570 } 571 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); 572 573 #define NF_FLOW_RULE_ACTION_MAX 16 574 575 static struct nf_flow_rule * 576 nf_flow_offload_rule_alloc(struct net *net, 577 const struct flow_offload_work *offload, 578 enum flow_offload_tuple_dir dir) 579 { 580 const struct nf_flowtable *flowtable = offload->flowtable; 581 const struct flow_offload *flow = offload->flow; 582 const struct flow_offload_tuple *tuple; 583 struct nf_flow_rule *flow_rule; 584 struct dst_entry *other_dst; 585 int err = -ENOMEM; 586 587 flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); 588 if (!flow_rule) 589 goto err_flow; 590 591 flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX); 592 if (!flow_rule->rule) 593 goto err_flow_rule; 594 595 flow_rule->rule->match.dissector = &flow_rule->match.dissector; 596 flow_rule->rule->match.mask = &flow_rule->match.mask; 597 flow_rule->rule->match.key = &flow_rule->match.key; 598 599 tuple = &flow->tuplehash[dir].tuple; 600 other_dst = flow->tuplehash[!dir].tuple.dst_cache; 601 err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst); 602 if (err < 0) 603 goto err_flow_match; 604 605 flow_rule->rule->action.num_entries = 0; 606 if (flowtable->type->action(net, flow, dir, flow_rule) < 0) 607 goto err_flow_match; 608 609 return flow_rule; 610 611 err_flow_match: 612 kfree(flow_rule->rule); 613 err_flow_rule: 614 kfree(flow_rule); 615 err_flow: 616 return NULL; 617 } 618 619 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule) 620 { 621 struct flow_action_entry *entry; 622 int i; 623 624 for (i = 0; i < flow_rule->rule->action.num_entries; i++) { 625 entry = &flow_rule->rule->action.entries[i]; 626 if (entry->id != FLOW_ACTION_REDIRECT) 627 continue; 628 629 dev_put(entry->dev); 630 } 631 kfree(flow_rule->rule); 632 kfree(flow_rule); 633 } 634 635 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[]) 636 { 637 int i; 638 639 for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++) 640 __nf_flow_offload_destroy(flow_rule[i]); 641 } 642 643 static int nf_flow_offload_alloc(const struct flow_offload_work *offload, 644 struct nf_flow_rule *flow_rule[]) 645 { 646 struct net *net = read_pnet(&offload->flowtable->net); 647 648 flow_rule[0] = nf_flow_offload_rule_alloc(net, offload, 649 FLOW_OFFLOAD_DIR_ORIGINAL); 650 if (!flow_rule[0]) 651 return -ENOMEM; 652 653 flow_rule[1] = nf_flow_offload_rule_alloc(net, offload, 654 FLOW_OFFLOAD_DIR_REPLY); 655 if (!flow_rule[1]) { 656 __nf_flow_offload_destroy(flow_rule[0]); 657 return -ENOMEM; 658 } 659 660 return 0; 661 } 662 663 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow, 664 __be16 proto, int priority, 665 enum flow_cls_command cmd, 666 const struct flow_offload_tuple *tuple, 667 struct netlink_ext_ack *extack) 668 { 669 cls_flow->common.protocol = proto; 670 cls_flow->common.prio = priority; 671 cls_flow->common.extack = extack; 672 cls_flow->command = cmd; 673 cls_flow->cookie = (unsigned long)tuple; 674 } 675 676 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, 677 struct flow_offload *flow, 678 struct nf_flow_rule *flow_rule, 679 enum flow_offload_tuple_dir dir, 680 int priority, int cmd, 681 struct flow_stats *stats, 682 struct list_head *block_cb_list) 683 { 684 struct flow_cls_offload cls_flow = {}; 685 struct flow_block_cb *block_cb; 686 struct netlink_ext_ack extack; 687 __be16 proto = ETH_P_ALL; 688 int err, i = 0; 689 690 nf_flow_offload_init(&cls_flow, proto, priority, cmd, 691 &flow->tuplehash[dir].tuple, &extack); 692 if (cmd == FLOW_CLS_REPLACE) 693 cls_flow.rule = flow_rule->rule; 694 695 down_read(&flowtable->flow_block_lock); 696 list_for_each_entry(block_cb, block_cb_list, list) { 697 err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, 698 block_cb->cb_priv); 699 if (err < 0) 700 continue; 701 702 i++; 703 } 704 up_read(&flowtable->flow_block_lock); 705 706 if (cmd == FLOW_CLS_STATS) 707 memcpy(stats, &cls_flow.stats, sizeof(*stats)); 708 709 return i; 710 } 711 712 static int flow_offload_tuple_add(struct flow_offload_work *offload, 713 struct nf_flow_rule *flow_rule, 714 enum flow_offload_tuple_dir dir) 715 { 716 return nf_flow_offload_tuple(offload->flowtable, offload->flow, 717 flow_rule, dir, offload->priority, 718 FLOW_CLS_REPLACE, NULL, 719 &offload->flowtable->flow_block.cb_list); 720 } 721 722 static void flow_offload_tuple_del(struct flow_offload_work *offload, 723 enum flow_offload_tuple_dir dir) 724 { 725 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, 726 offload->priority, FLOW_CLS_DESTROY, NULL, 727 &offload->flowtable->flow_block.cb_list); 728 } 729 730 static int flow_offload_rule_add(struct flow_offload_work *offload, 731 struct nf_flow_rule *flow_rule[]) 732 { 733 int ok_count = 0; 734 735 ok_count += flow_offload_tuple_add(offload, flow_rule[0], 736 FLOW_OFFLOAD_DIR_ORIGINAL); 737 ok_count += flow_offload_tuple_add(offload, flow_rule[1], 738 FLOW_OFFLOAD_DIR_REPLY); 739 if (ok_count == 0) 740 return -ENOENT; 741 742 return 0; 743 } 744 745 static void flow_offload_work_add(struct flow_offload_work *offload) 746 { 747 struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX]; 748 int err; 749 750 err = nf_flow_offload_alloc(offload, flow_rule); 751 if (err < 0) 752 return; 753 754 err = flow_offload_rule_add(offload, flow_rule); 755 if (err < 0) 756 set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags); 757 else 758 set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); 759 760 nf_flow_offload_destroy(flow_rule); 761 } 762 763 static void flow_offload_work_del(struct flow_offload_work *offload) 764 { 765 clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); 766 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL); 767 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY); 768 set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags); 769 } 770 771 static void flow_offload_tuple_stats(struct flow_offload_work *offload, 772 enum flow_offload_tuple_dir dir, 773 struct flow_stats *stats) 774 { 775 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, 776 offload->priority, FLOW_CLS_STATS, stats, 777 &offload->flowtable->flow_block.cb_list); 778 } 779 780 static void flow_offload_work_stats(struct flow_offload_work *offload) 781 { 782 struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {}; 783 u64 lastused; 784 785 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]); 786 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]); 787 788 lastused = max_t(u64, stats[0].lastused, stats[1].lastused); 789 offload->flow->timeout = max_t(u64, offload->flow->timeout, 790 lastused + NF_FLOW_TIMEOUT); 791 792 if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) { 793 if (stats[0].pkts) 794 nf_ct_acct_add(offload->flow->ct, 795 FLOW_OFFLOAD_DIR_ORIGINAL, 796 stats[0].pkts, stats[0].bytes); 797 if (stats[1].pkts) 798 nf_ct_acct_add(offload->flow->ct, 799 FLOW_OFFLOAD_DIR_REPLY, 800 stats[1].pkts, stats[1].bytes); 801 } 802 } 803 804 static void flow_offload_work_handler(struct work_struct *work) 805 { 806 struct flow_offload_work *offload; 807 808 offload = container_of(work, struct flow_offload_work, work); 809 switch (offload->cmd) { 810 case FLOW_CLS_REPLACE: 811 flow_offload_work_add(offload); 812 break; 813 case FLOW_CLS_DESTROY: 814 flow_offload_work_del(offload); 815 break; 816 case FLOW_CLS_STATS: 817 flow_offload_work_stats(offload); 818 break; 819 default: 820 WARN_ON_ONCE(1); 821 } 822 823 clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags); 824 kfree(offload); 825 } 826 827 static void flow_offload_queue_work(struct flow_offload_work *offload) 828 { 829 queue_work(nf_flow_offload_wq, &offload->work); 830 } 831 832 static struct flow_offload_work * 833 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, 834 struct flow_offload *flow, unsigned int cmd) 835 { 836 struct flow_offload_work *offload; 837 838 if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags)) 839 return NULL; 840 841 offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC); 842 if (!offload) { 843 clear_bit(NF_FLOW_HW_PENDING, &flow->flags); 844 return NULL; 845 } 846 847 offload->cmd = cmd; 848 offload->flow = flow; 849 offload->priority = flowtable->priority; 850 offload->flowtable = flowtable; 851 INIT_WORK(&offload->work, flow_offload_work_handler); 852 853 return offload; 854 } 855 856 857 void nf_flow_offload_add(struct nf_flowtable *flowtable, 858 struct flow_offload *flow) 859 { 860 struct flow_offload_work *offload; 861 862 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE); 863 if (!offload) 864 return; 865 866 flow_offload_queue_work(offload); 867 } 868 869 void nf_flow_offload_del(struct nf_flowtable *flowtable, 870 struct flow_offload *flow) 871 { 872 struct flow_offload_work *offload; 873 874 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY); 875 if (!offload) 876 return; 877 878 set_bit(NF_FLOW_HW_DYING, &flow->flags); 879 flow_offload_queue_work(offload); 880 } 881 882 void nf_flow_offload_stats(struct nf_flowtable *flowtable, 883 struct flow_offload *flow) 884 { 885 struct flow_offload_work *offload; 886 __s32 delta; 887 888 delta = nf_flow_timeout_delta(flow->timeout); 889 if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10)) 890 return; 891 892 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS); 893 if (!offload) 894 return; 895 896 flow_offload_queue_work(offload); 897 } 898 899 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) 900 { 901 if (nf_flowtable_hw_offload(flowtable)) 902 flush_workqueue(nf_flow_offload_wq); 903 } 904 905 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, 906 struct flow_block_offload *bo, 907 enum flow_block_command cmd) 908 { 909 struct flow_block_cb *block_cb, *next; 910 int err = 0; 911 912 switch (cmd) { 913 case FLOW_BLOCK_BIND: 914 list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); 915 break; 916 case FLOW_BLOCK_UNBIND: 917 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { 918 list_del(&block_cb->list); 919 flow_block_cb_free(block_cb); 920 } 921 break; 922 default: 923 WARN_ON_ONCE(1); 924 err = -EOPNOTSUPP; 925 } 926 927 return err; 928 } 929 930 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, 931 struct net *net, 932 enum flow_block_command cmd, 933 struct nf_flowtable *flowtable, 934 struct netlink_ext_ack *extack) 935 { 936 memset(bo, 0, sizeof(*bo)); 937 bo->net = net; 938 bo->block = &flowtable->flow_block; 939 bo->command = cmd; 940 bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; 941 bo->extack = extack; 942 INIT_LIST_HEAD(&bo->cb_list); 943 } 944 945 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb) 946 { 947 struct nf_flowtable *flowtable = block_cb->indr.data; 948 struct net_device *dev = block_cb->indr.dev; 949 950 nf_flow_table_gc_cleanup(flowtable, dev); 951 down_write(&flowtable->flow_block_lock); 952 list_del(&block_cb->list); 953 list_del(&block_cb->driver_list); 954 flow_block_cb_free(block_cb); 955 up_write(&flowtable->flow_block_lock); 956 } 957 958 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo, 959 struct nf_flowtable *flowtable, 960 struct net_device *dev, 961 enum flow_block_command cmd, 962 struct netlink_ext_ack *extack) 963 { 964 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, 965 extack); 966 967 return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo, 968 nf_flow_table_indr_cleanup); 969 } 970 971 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, 972 struct nf_flowtable *flowtable, 973 struct net_device *dev, 974 enum flow_block_command cmd, 975 struct netlink_ext_ack *extack) 976 { 977 int err; 978 979 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, 980 extack); 981 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); 982 if (err < 0) 983 return err; 984 985 return 0; 986 } 987 988 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, 989 struct net_device *dev, 990 enum flow_block_command cmd) 991 { 992 struct netlink_ext_ack extack = {}; 993 struct flow_block_offload bo; 994 int err; 995 996 if (!nf_flowtable_hw_offload(flowtable)) 997 return 0; 998 999 if (dev->netdev_ops->ndo_setup_tc) 1000 err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, 1001 &extack); 1002 else 1003 err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, 1004 &extack); 1005 if (err < 0) 1006 return err; 1007 1008 return nf_flow_table_block_setup(flowtable, &bo, cmd); 1009 } 1010 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup); 1011 1012 int nf_flow_table_offload_init(void) 1013 { 1014 nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload", 1015 WQ_UNBOUND, 0); 1016 if (!nf_flow_offload_wq) 1017 return -ENOMEM; 1018 1019 return 0; 1020 } 1021 1022 void nf_flow_table_offload_exit(void) 1023 { 1024 destroy_workqueue(nf_flow_offload_wq); 1025 } 1026