1 #include <linux/kernel.h> 2 #include <linux/init.h> 3 #include <linux/module.h> 4 #include <linux/netfilter.h> 5 #include <linux/rhashtable.h> 6 #include <linux/netdevice.h> 7 #include <linux/tc_act/tc_csum.h> 8 #include <net/flow_offload.h> 9 #include <net/netfilter/nf_flow_table.h> 10 #include <net/netfilter/nf_tables.h> 11 #include <net/netfilter/nf_conntrack.h> 12 #include <net/netfilter/nf_conntrack_acct.h> 13 #include <net/netfilter/nf_conntrack_core.h> 14 #include <net/netfilter/nf_conntrack_tuple.h> 15 16 static struct workqueue_struct *nf_flow_offload_add_wq; 17 static struct workqueue_struct *nf_flow_offload_del_wq; 18 static struct workqueue_struct *nf_flow_offload_stats_wq; 19 20 struct flow_offload_work { 21 struct list_head list; 22 enum flow_cls_command cmd; 23 int priority; 24 struct nf_flowtable *flowtable; 25 struct flow_offload *flow; 26 struct work_struct work; 27 }; 28 29 #define NF_FLOW_DISSECTOR(__match, __type, __field) \ 30 (__match)->dissector.offset[__type] = \ 31 offsetof(struct nf_flow_key, __field) 32 33 static void nf_flow_rule_lwt_match(struct nf_flow_match *match, 34 struct ip_tunnel_info *tun_info) 35 { 36 struct nf_flow_key *mask = &match->mask; 37 struct nf_flow_key *key = &match->key; 38 unsigned int enc_keys; 39 40 if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX)) 41 return; 42 43 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); 44 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); 45 key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id); 46 mask->enc_key_id.keyid = 0xffffffff; 47 enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 48 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL); 49 50 if (ip_tunnel_info_af(tun_info) == AF_INET) { 51 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 52 enc_ipv4); 53 key->enc_ipv4.src = tun_info->key.u.ipv4.dst; 54 key->enc_ipv4.dst = tun_info->key.u.ipv4.src; 55 if (key->enc_ipv4.src) 56 mask->enc_ipv4.src = 0xffffffff; 57 if (key->enc_ipv4.dst) 58 mask->enc_ipv4.dst = 0xffffffff; 59 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); 60 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 61 } else { 62 memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst, 63 sizeof(struct in6_addr)); 64 memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src, 65 sizeof(struct in6_addr)); 66 if (memcmp(&key->enc_ipv6.src, &in6addr_any, 67 sizeof(struct in6_addr))) 68 memset(&key->enc_ipv6.src, 0xff, 69 sizeof(struct in6_addr)); 70 if (memcmp(&key->enc_ipv6.dst, &in6addr_any, 71 sizeof(struct in6_addr))) 72 memset(&key->enc_ipv6.dst, 0xff, 73 sizeof(struct in6_addr)); 74 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); 75 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 76 } 77 78 match->dissector.used_keys |= enc_keys; 79 } 80 81 static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key, 82 struct flow_dissector_key_vlan *mask, 83 u16 vlan_id, __be16 proto) 84 { 85 key->vlan_id = vlan_id; 86 mask->vlan_id = VLAN_VID_MASK; 87 key->vlan_tpid = proto; 88 mask->vlan_tpid = 0xffff; 89 } 90 91 static int nf_flow_rule_match(struct nf_flow_match *match, 92 const struct flow_offload_tuple *tuple, 93 struct dst_entry *other_dst) 94 { 95 struct nf_flow_key *mask = &match->mask; 96 struct nf_flow_key *key = &match->key; 97 struct ip_tunnel_info *tun_info; 98 bool vlan_encap = false; 99 100 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); 101 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); 102 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic); 103 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); 104 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); 105 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); 106 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); 107 108 if (other_dst && other_dst->lwtstate) { 109 tun_info = lwt_tun_info(other_dst->lwtstate); 110 nf_flow_rule_lwt_match(match, tun_info); 111 } 112 113 key->meta.ingress_ifindex = tuple->iifidx; 114 mask->meta.ingress_ifindex = 0xffffffff; 115 116 if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) && 117 tuple->encap[0].proto == htons(ETH_P_8021Q)) { 118 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan); 119 nf_flow_rule_vlan_match(&key->vlan, &mask->vlan, 120 tuple->encap[0].id, 121 tuple->encap[0].proto); 122 vlan_encap = true; 123 } 124 125 if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) && 126 tuple->encap[1].proto == htons(ETH_P_8021Q)) { 127 if (vlan_encap) { 128 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN, 129 cvlan); 130 nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan, 131 tuple->encap[1].id, 132 tuple->encap[1].proto); 133 } else { 134 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, 135 vlan); 136 nf_flow_rule_vlan_match(&key->vlan, &mask->vlan, 137 tuple->encap[1].id, 138 tuple->encap[1].proto); 139 } 140 } 141 142 switch (tuple->l3proto) { 143 case AF_INET: 144 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 145 key->basic.n_proto = htons(ETH_P_IP); 146 key->ipv4.src = tuple->src_v4.s_addr; 147 mask->ipv4.src = 0xffffffff; 148 key->ipv4.dst = tuple->dst_v4.s_addr; 149 mask->ipv4.dst = 0xffffffff; 150 break; 151 case AF_INET6: 152 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 153 key->basic.n_proto = htons(ETH_P_IPV6); 154 key->ipv6.src = tuple->src_v6; 155 memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src)); 156 key->ipv6.dst = tuple->dst_v6; 157 memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst)); 158 break; 159 default: 160 return -EOPNOTSUPP; 161 } 162 mask->control.addr_type = 0xffff; 163 match->dissector.used_keys |= BIT(key->control.addr_type); 164 mask->basic.n_proto = 0xffff; 165 166 switch (tuple->l4proto) { 167 case IPPROTO_TCP: 168 key->tcp.flags = 0; 169 mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16); 170 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); 171 break; 172 case IPPROTO_UDP: 173 break; 174 default: 175 return -EOPNOTSUPP; 176 } 177 178 key->basic.ip_proto = tuple->l4proto; 179 mask->basic.ip_proto = 0xff; 180 181 key->tp.src = tuple->src_port; 182 mask->tp.src = 0xffff; 183 key->tp.dst = tuple->dst_port; 184 mask->tp.dst = 0xffff; 185 186 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | 187 BIT(FLOW_DISSECTOR_KEY_CONTROL) | 188 BIT(FLOW_DISSECTOR_KEY_BASIC) | 189 BIT(FLOW_DISSECTOR_KEY_PORTS); 190 return 0; 191 } 192 193 static void flow_offload_mangle(struct flow_action_entry *entry, 194 enum flow_action_mangle_base htype, u32 offset, 195 const __be32 *value, const __be32 *mask) 196 { 197 entry->id = FLOW_ACTION_MANGLE; 198 entry->mangle.htype = htype; 199 entry->mangle.offset = offset; 200 memcpy(&entry->mangle.mask, mask, sizeof(u32)); 201 memcpy(&entry->mangle.val, value, sizeof(u32)); 202 } 203 204 static inline struct flow_action_entry * 205 flow_action_entry_next(struct nf_flow_rule *flow_rule) 206 { 207 int i = flow_rule->rule->action.num_entries++; 208 209 return &flow_rule->rule->action.entries[i]; 210 } 211 212 static int flow_offload_eth_src(struct net *net, 213 const struct flow_offload *flow, 214 enum flow_offload_tuple_dir dir, 215 struct nf_flow_rule *flow_rule) 216 { 217 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); 218 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); 219 const struct flow_offload_tuple *other_tuple, *this_tuple; 220 struct net_device *dev = NULL; 221 const unsigned char *addr; 222 u32 mask, val; 223 u16 val16; 224 225 this_tuple = &flow->tuplehash[dir].tuple; 226 227 switch (this_tuple->xmit_type) { 228 case FLOW_OFFLOAD_XMIT_DIRECT: 229 addr = this_tuple->out.h_source; 230 break; 231 case FLOW_OFFLOAD_XMIT_NEIGH: 232 other_tuple = &flow->tuplehash[!dir].tuple; 233 dev = dev_get_by_index(net, other_tuple->iifidx); 234 if (!dev) 235 return -ENOENT; 236 237 addr = dev->dev_addr; 238 break; 239 default: 240 return -EOPNOTSUPP; 241 } 242 243 mask = ~0xffff0000; 244 memcpy(&val16, addr, 2); 245 val = val16 << 16; 246 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, 247 &val, &mask); 248 249 mask = ~0xffffffff; 250 memcpy(&val, addr + 2, 4); 251 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, 252 &val, &mask); 253 254 if (dev) 255 dev_put(dev); 256 257 return 0; 258 } 259 260 static int flow_offload_eth_dst(struct net *net, 261 const struct flow_offload *flow, 262 enum flow_offload_tuple_dir dir, 263 struct nf_flow_rule *flow_rule) 264 { 265 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); 266 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); 267 const struct flow_offload_tuple *other_tuple, *this_tuple; 268 const struct dst_entry *dst_cache; 269 unsigned char ha[ETH_ALEN]; 270 struct neighbour *n; 271 const void *daddr; 272 u32 mask, val; 273 u8 nud_state; 274 u16 val16; 275 276 this_tuple = &flow->tuplehash[dir].tuple; 277 278 switch (this_tuple->xmit_type) { 279 case FLOW_OFFLOAD_XMIT_DIRECT: 280 ether_addr_copy(ha, this_tuple->out.h_dest); 281 break; 282 case FLOW_OFFLOAD_XMIT_NEIGH: 283 other_tuple = &flow->tuplehash[!dir].tuple; 284 daddr = &other_tuple->src_v4; 285 dst_cache = this_tuple->dst_cache; 286 n = dst_neigh_lookup(dst_cache, daddr); 287 if (!n) 288 return -ENOENT; 289 290 read_lock_bh(&n->lock); 291 nud_state = n->nud_state; 292 ether_addr_copy(ha, n->ha); 293 read_unlock_bh(&n->lock); 294 neigh_release(n); 295 296 if (!(nud_state & NUD_VALID)) 297 return -ENOENT; 298 break; 299 default: 300 return -EOPNOTSUPP; 301 } 302 303 mask = ~0xffffffff; 304 memcpy(&val, ha, 4); 305 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0, 306 &val, &mask); 307 308 mask = ~0x0000ffff; 309 memcpy(&val16, ha + 4, 2); 310 val = val16; 311 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, 312 &val, &mask); 313 314 return 0; 315 } 316 317 static void flow_offload_ipv4_snat(struct net *net, 318 const struct flow_offload *flow, 319 enum flow_offload_tuple_dir dir, 320 struct nf_flow_rule *flow_rule) 321 { 322 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 323 u32 mask = ~htonl(0xffffffff); 324 __be32 addr; 325 u32 offset; 326 327 switch (dir) { 328 case FLOW_OFFLOAD_DIR_ORIGINAL: 329 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; 330 offset = offsetof(struct iphdr, saddr); 331 break; 332 case FLOW_OFFLOAD_DIR_REPLY: 333 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; 334 offset = offsetof(struct iphdr, daddr); 335 break; 336 default: 337 return; 338 } 339 340 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, 341 &addr, &mask); 342 } 343 344 static void flow_offload_ipv4_dnat(struct net *net, 345 const struct flow_offload *flow, 346 enum flow_offload_tuple_dir dir, 347 struct nf_flow_rule *flow_rule) 348 { 349 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 350 u32 mask = ~htonl(0xffffffff); 351 __be32 addr; 352 u32 offset; 353 354 switch (dir) { 355 case FLOW_OFFLOAD_DIR_ORIGINAL: 356 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; 357 offset = offsetof(struct iphdr, daddr); 358 break; 359 case FLOW_OFFLOAD_DIR_REPLY: 360 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; 361 offset = offsetof(struct iphdr, saddr); 362 break; 363 default: 364 return; 365 } 366 367 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, 368 &addr, &mask); 369 } 370 371 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, 372 unsigned int offset, 373 const __be32 *addr, const __be32 *mask) 374 { 375 struct flow_action_entry *entry; 376 int i, j; 377 378 for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) { 379 entry = flow_action_entry_next(flow_rule); 380 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, 381 offset + i, &addr[j], mask); 382 } 383 } 384 385 static void flow_offload_ipv6_snat(struct net *net, 386 const struct flow_offload *flow, 387 enum flow_offload_tuple_dir dir, 388 struct nf_flow_rule *flow_rule) 389 { 390 u32 mask = ~htonl(0xffffffff); 391 const __be32 *addr; 392 u32 offset; 393 394 switch (dir) { 395 case FLOW_OFFLOAD_DIR_ORIGINAL: 396 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32; 397 offset = offsetof(struct ipv6hdr, saddr); 398 break; 399 case FLOW_OFFLOAD_DIR_REPLY: 400 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32; 401 offset = offsetof(struct ipv6hdr, daddr); 402 break; 403 default: 404 return; 405 } 406 407 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); 408 } 409 410 static void flow_offload_ipv6_dnat(struct net *net, 411 const struct flow_offload *flow, 412 enum flow_offload_tuple_dir dir, 413 struct nf_flow_rule *flow_rule) 414 { 415 u32 mask = ~htonl(0xffffffff); 416 const __be32 *addr; 417 u32 offset; 418 419 switch (dir) { 420 case FLOW_OFFLOAD_DIR_ORIGINAL: 421 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32; 422 offset = offsetof(struct ipv6hdr, daddr); 423 break; 424 case FLOW_OFFLOAD_DIR_REPLY: 425 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32; 426 offset = offsetof(struct ipv6hdr, saddr); 427 break; 428 default: 429 return; 430 } 431 432 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); 433 } 434 435 static int flow_offload_l4proto(const struct flow_offload *flow) 436 { 437 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; 438 u8 type = 0; 439 440 switch (protonum) { 441 case IPPROTO_TCP: 442 type = FLOW_ACT_MANGLE_HDR_TYPE_TCP; 443 break; 444 case IPPROTO_UDP: 445 type = FLOW_ACT_MANGLE_HDR_TYPE_UDP; 446 break; 447 default: 448 break; 449 } 450 451 return type; 452 } 453 454 static void flow_offload_port_snat(struct net *net, 455 const struct flow_offload *flow, 456 enum flow_offload_tuple_dir dir, 457 struct nf_flow_rule *flow_rule) 458 { 459 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 460 u32 mask, port; 461 u32 offset; 462 463 switch (dir) { 464 case FLOW_OFFLOAD_DIR_ORIGINAL: 465 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); 466 offset = 0; /* offsetof(struct tcphdr, source); */ 467 port = htonl(port << 16); 468 mask = ~htonl(0xffff0000); 469 break; 470 case FLOW_OFFLOAD_DIR_REPLY: 471 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port); 472 offset = 0; /* offsetof(struct tcphdr, dest); */ 473 port = htonl(port); 474 mask = ~htonl(0xffff); 475 break; 476 default: 477 return; 478 } 479 480 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, 481 &port, &mask); 482 } 483 484 static void flow_offload_port_dnat(struct net *net, 485 const struct flow_offload *flow, 486 enum flow_offload_tuple_dir dir, 487 struct nf_flow_rule *flow_rule) 488 { 489 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 490 u32 mask, port; 491 u32 offset; 492 493 switch (dir) { 494 case FLOW_OFFLOAD_DIR_ORIGINAL: 495 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); 496 offset = 0; /* offsetof(struct tcphdr, dest); */ 497 port = htonl(port); 498 mask = ~htonl(0xffff); 499 break; 500 case FLOW_OFFLOAD_DIR_REPLY: 501 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port); 502 offset = 0; /* offsetof(struct tcphdr, source); */ 503 port = htonl(port << 16); 504 mask = ~htonl(0xffff0000); 505 break; 506 default: 507 return; 508 } 509 510 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, 511 &port, &mask); 512 } 513 514 static void flow_offload_ipv4_checksum(struct net *net, 515 const struct flow_offload *flow, 516 struct nf_flow_rule *flow_rule) 517 { 518 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; 519 struct flow_action_entry *entry = flow_action_entry_next(flow_rule); 520 521 entry->id = FLOW_ACTION_CSUM; 522 entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; 523 524 switch (protonum) { 525 case IPPROTO_TCP: 526 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP; 527 break; 528 case IPPROTO_UDP: 529 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP; 530 break; 531 } 532 } 533 534 static void flow_offload_redirect(struct net *net, 535 const struct flow_offload *flow, 536 enum flow_offload_tuple_dir dir, 537 struct nf_flow_rule *flow_rule) 538 { 539 const struct flow_offload_tuple *this_tuple, *other_tuple; 540 struct flow_action_entry *entry; 541 struct net_device *dev; 542 int ifindex; 543 544 this_tuple = &flow->tuplehash[dir].tuple; 545 switch (this_tuple->xmit_type) { 546 case FLOW_OFFLOAD_XMIT_DIRECT: 547 this_tuple = &flow->tuplehash[dir].tuple; 548 ifindex = this_tuple->out.hw_ifidx; 549 break; 550 case FLOW_OFFLOAD_XMIT_NEIGH: 551 other_tuple = &flow->tuplehash[!dir].tuple; 552 ifindex = other_tuple->iifidx; 553 break; 554 default: 555 return; 556 } 557 558 dev = dev_get_by_index(net, ifindex); 559 if (!dev) 560 return; 561 562 entry = flow_action_entry_next(flow_rule); 563 entry->id = FLOW_ACTION_REDIRECT; 564 entry->dev = dev; 565 } 566 567 static void flow_offload_encap_tunnel(const struct flow_offload *flow, 568 enum flow_offload_tuple_dir dir, 569 struct nf_flow_rule *flow_rule) 570 { 571 const struct flow_offload_tuple *this_tuple; 572 struct flow_action_entry *entry; 573 struct dst_entry *dst; 574 575 this_tuple = &flow->tuplehash[dir].tuple; 576 if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) 577 return; 578 579 dst = this_tuple->dst_cache; 580 if (dst && dst->lwtstate) { 581 struct ip_tunnel_info *tun_info; 582 583 tun_info = lwt_tun_info(dst->lwtstate); 584 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { 585 entry = flow_action_entry_next(flow_rule); 586 entry->id = FLOW_ACTION_TUNNEL_ENCAP; 587 entry->tunnel = tun_info; 588 } 589 } 590 } 591 592 static void flow_offload_decap_tunnel(const struct flow_offload *flow, 593 enum flow_offload_tuple_dir dir, 594 struct nf_flow_rule *flow_rule) 595 { 596 const struct flow_offload_tuple *other_tuple; 597 struct flow_action_entry *entry; 598 struct dst_entry *dst; 599 600 other_tuple = &flow->tuplehash[!dir].tuple; 601 if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) 602 return; 603 604 dst = other_tuple->dst_cache; 605 if (dst && dst->lwtstate) { 606 struct ip_tunnel_info *tun_info; 607 608 tun_info = lwt_tun_info(dst->lwtstate); 609 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { 610 entry = flow_action_entry_next(flow_rule); 611 entry->id = FLOW_ACTION_TUNNEL_DECAP; 612 } 613 } 614 } 615 616 static int 617 nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, 618 enum flow_offload_tuple_dir dir, 619 struct nf_flow_rule *flow_rule) 620 { 621 const struct flow_offload_tuple *other_tuple; 622 const struct flow_offload_tuple *tuple; 623 int i; 624 625 flow_offload_decap_tunnel(flow, dir, flow_rule); 626 flow_offload_encap_tunnel(flow, dir, flow_rule); 627 628 if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || 629 flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) 630 return -1; 631 632 tuple = &flow->tuplehash[dir].tuple; 633 634 for (i = 0; i < tuple->encap_num; i++) { 635 struct flow_action_entry *entry; 636 637 if (tuple->in_vlan_ingress & BIT(i)) 638 continue; 639 640 if (tuple->encap[i].proto == htons(ETH_P_8021Q)) { 641 entry = flow_action_entry_next(flow_rule); 642 entry->id = FLOW_ACTION_VLAN_POP; 643 } 644 } 645 646 other_tuple = &flow->tuplehash[!dir].tuple; 647 648 for (i = 0; i < other_tuple->encap_num; i++) { 649 struct flow_action_entry *entry; 650 651 if (other_tuple->in_vlan_ingress & BIT(i)) 652 continue; 653 654 entry = flow_action_entry_next(flow_rule); 655 656 switch (other_tuple->encap[i].proto) { 657 case htons(ETH_P_PPP_SES): 658 entry->id = FLOW_ACTION_PPPOE_PUSH; 659 entry->pppoe.sid = other_tuple->encap[i].id; 660 break; 661 case htons(ETH_P_8021Q): 662 entry->id = FLOW_ACTION_VLAN_PUSH; 663 entry->vlan.vid = other_tuple->encap[i].id; 664 entry->vlan.proto = other_tuple->encap[i].proto; 665 break; 666 } 667 } 668 669 return 0; 670 } 671 672 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, 673 enum flow_offload_tuple_dir dir, 674 struct nf_flow_rule *flow_rule) 675 { 676 if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0) 677 return -1; 678 679 if (test_bit(NF_FLOW_SNAT, &flow->flags)) { 680 flow_offload_ipv4_snat(net, flow, dir, flow_rule); 681 flow_offload_port_snat(net, flow, dir, flow_rule); 682 } 683 if (test_bit(NF_FLOW_DNAT, &flow->flags)) { 684 flow_offload_ipv4_dnat(net, flow, dir, flow_rule); 685 flow_offload_port_dnat(net, flow, dir, flow_rule); 686 } 687 if (test_bit(NF_FLOW_SNAT, &flow->flags) || 688 test_bit(NF_FLOW_DNAT, &flow->flags)) 689 flow_offload_ipv4_checksum(net, flow, flow_rule); 690 691 flow_offload_redirect(net, flow, dir, flow_rule); 692 693 return 0; 694 } 695 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4); 696 697 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, 698 enum flow_offload_tuple_dir dir, 699 struct nf_flow_rule *flow_rule) 700 { 701 if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0) 702 return -1; 703 704 if (test_bit(NF_FLOW_SNAT, &flow->flags)) { 705 flow_offload_ipv6_snat(net, flow, dir, flow_rule); 706 flow_offload_port_snat(net, flow, dir, flow_rule); 707 } 708 if (test_bit(NF_FLOW_DNAT, &flow->flags)) { 709 flow_offload_ipv6_dnat(net, flow, dir, flow_rule); 710 flow_offload_port_dnat(net, flow, dir, flow_rule); 711 } 712 713 flow_offload_redirect(net, flow, dir, flow_rule); 714 715 return 0; 716 } 717 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); 718 719 #define NF_FLOW_RULE_ACTION_MAX 16 720 721 static struct nf_flow_rule * 722 nf_flow_offload_rule_alloc(struct net *net, 723 const struct flow_offload_work *offload, 724 enum flow_offload_tuple_dir dir) 725 { 726 const struct nf_flowtable *flowtable = offload->flowtable; 727 const struct flow_offload_tuple *tuple, *other_tuple; 728 const struct flow_offload *flow = offload->flow; 729 struct dst_entry *other_dst = NULL; 730 struct nf_flow_rule *flow_rule; 731 int err = -ENOMEM; 732 733 flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); 734 if (!flow_rule) 735 goto err_flow; 736 737 flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX); 738 if (!flow_rule->rule) 739 goto err_flow_rule; 740 741 flow_rule->rule->match.dissector = &flow_rule->match.dissector; 742 flow_rule->rule->match.mask = &flow_rule->match.mask; 743 flow_rule->rule->match.key = &flow_rule->match.key; 744 745 tuple = &flow->tuplehash[dir].tuple; 746 other_tuple = &flow->tuplehash[!dir].tuple; 747 if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) 748 other_dst = other_tuple->dst_cache; 749 750 err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst); 751 if (err < 0) 752 goto err_flow_match; 753 754 flow_rule->rule->action.num_entries = 0; 755 if (flowtable->type->action(net, flow, dir, flow_rule) < 0) 756 goto err_flow_match; 757 758 return flow_rule; 759 760 err_flow_match: 761 kfree(flow_rule->rule); 762 err_flow_rule: 763 kfree(flow_rule); 764 err_flow: 765 return NULL; 766 } 767 768 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule) 769 { 770 struct flow_action_entry *entry; 771 int i; 772 773 for (i = 0; i < flow_rule->rule->action.num_entries; i++) { 774 entry = &flow_rule->rule->action.entries[i]; 775 if (entry->id != FLOW_ACTION_REDIRECT) 776 continue; 777 778 dev_put(entry->dev); 779 } 780 kfree(flow_rule->rule); 781 kfree(flow_rule); 782 } 783 784 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[]) 785 { 786 int i; 787 788 for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++) 789 __nf_flow_offload_destroy(flow_rule[i]); 790 } 791 792 static int nf_flow_offload_alloc(const struct flow_offload_work *offload, 793 struct nf_flow_rule *flow_rule[]) 794 { 795 struct net *net = read_pnet(&offload->flowtable->net); 796 797 flow_rule[0] = nf_flow_offload_rule_alloc(net, offload, 798 FLOW_OFFLOAD_DIR_ORIGINAL); 799 if (!flow_rule[0]) 800 return -ENOMEM; 801 802 flow_rule[1] = nf_flow_offload_rule_alloc(net, offload, 803 FLOW_OFFLOAD_DIR_REPLY); 804 if (!flow_rule[1]) { 805 __nf_flow_offload_destroy(flow_rule[0]); 806 return -ENOMEM; 807 } 808 809 return 0; 810 } 811 812 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow, 813 __be16 proto, int priority, 814 enum flow_cls_command cmd, 815 const struct flow_offload_tuple *tuple, 816 struct netlink_ext_ack *extack) 817 { 818 cls_flow->common.protocol = proto; 819 cls_flow->common.prio = priority; 820 cls_flow->common.extack = extack; 821 cls_flow->command = cmd; 822 cls_flow->cookie = (unsigned long)tuple; 823 } 824 825 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, 826 struct flow_offload *flow, 827 struct nf_flow_rule *flow_rule, 828 enum flow_offload_tuple_dir dir, 829 int priority, int cmd, 830 struct flow_stats *stats, 831 struct list_head *block_cb_list) 832 { 833 struct flow_cls_offload cls_flow = {}; 834 struct flow_block_cb *block_cb; 835 struct netlink_ext_ack extack; 836 __be16 proto = ETH_P_ALL; 837 int err, i = 0; 838 839 nf_flow_offload_init(&cls_flow, proto, priority, cmd, 840 &flow->tuplehash[dir].tuple, &extack); 841 if (cmd == FLOW_CLS_REPLACE) 842 cls_flow.rule = flow_rule->rule; 843 844 down_read(&flowtable->flow_block_lock); 845 list_for_each_entry(block_cb, block_cb_list, list) { 846 err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, 847 block_cb->cb_priv); 848 if (err < 0) 849 continue; 850 851 i++; 852 } 853 up_read(&flowtable->flow_block_lock); 854 855 if (cmd == FLOW_CLS_STATS) 856 memcpy(stats, &cls_flow.stats, sizeof(*stats)); 857 858 return i; 859 } 860 861 static int flow_offload_tuple_add(struct flow_offload_work *offload, 862 struct nf_flow_rule *flow_rule, 863 enum flow_offload_tuple_dir dir) 864 { 865 return nf_flow_offload_tuple(offload->flowtable, offload->flow, 866 flow_rule, dir, offload->priority, 867 FLOW_CLS_REPLACE, NULL, 868 &offload->flowtable->flow_block.cb_list); 869 } 870 871 static void flow_offload_tuple_del(struct flow_offload_work *offload, 872 enum flow_offload_tuple_dir dir) 873 { 874 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, 875 offload->priority, FLOW_CLS_DESTROY, NULL, 876 &offload->flowtable->flow_block.cb_list); 877 } 878 879 static int flow_offload_rule_add(struct flow_offload_work *offload, 880 struct nf_flow_rule *flow_rule[]) 881 { 882 int ok_count = 0; 883 884 ok_count += flow_offload_tuple_add(offload, flow_rule[0], 885 FLOW_OFFLOAD_DIR_ORIGINAL); 886 ok_count += flow_offload_tuple_add(offload, flow_rule[1], 887 FLOW_OFFLOAD_DIR_REPLY); 888 if (ok_count == 0) 889 return -ENOENT; 890 891 return 0; 892 } 893 894 static void flow_offload_work_add(struct flow_offload_work *offload) 895 { 896 struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX]; 897 int err; 898 899 err = nf_flow_offload_alloc(offload, flow_rule); 900 if (err < 0) 901 return; 902 903 err = flow_offload_rule_add(offload, flow_rule); 904 if (err < 0) 905 goto out; 906 907 set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); 908 909 out: 910 nf_flow_offload_destroy(flow_rule); 911 } 912 913 static void flow_offload_work_del(struct flow_offload_work *offload) 914 { 915 clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); 916 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL); 917 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY); 918 set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags); 919 } 920 921 static void flow_offload_tuple_stats(struct flow_offload_work *offload, 922 enum flow_offload_tuple_dir dir, 923 struct flow_stats *stats) 924 { 925 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, 926 offload->priority, FLOW_CLS_STATS, stats, 927 &offload->flowtable->flow_block.cb_list); 928 } 929 930 static void flow_offload_work_stats(struct flow_offload_work *offload) 931 { 932 struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {}; 933 u64 lastused; 934 935 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]); 936 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]); 937 938 lastused = max_t(u64, stats[0].lastused, stats[1].lastused); 939 offload->flow->timeout = max_t(u64, offload->flow->timeout, 940 lastused + flow_offload_get_timeout(offload->flow)); 941 942 if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) { 943 if (stats[0].pkts) 944 nf_ct_acct_add(offload->flow->ct, 945 FLOW_OFFLOAD_DIR_ORIGINAL, 946 stats[0].pkts, stats[0].bytes); 947 if (stats[1].pkts) 948 nf_ct_acct_add(offload->flow->ct, 949 FLOW_OFFLOAD_DIR_REPLY, 950 stats[1].pkts, stats[1].bytes); 951 } 952 } 953 954 static void flow_offload_work_handler(struct work_struct *work) 955 { 956 struct flow_offload_work *offload; 957 958 offload = container_of(work, struct flow_offload_work, work); 959 switch (offload->cmd) { 960 case FLOW_CLS_REPLACE: 961 flow_offload_work_add(offload); 962 break; 963 case FLOW_CLS_DESTROY: 964 flow_offload_work_del(offload); 965 break; 966 case FLOW_CLS_STATS: 967 flow_offload_work_stats(offload); 968 break; 969 default: 970 WARN_ON_ONCE(1); 971 } 972 973 clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags); 974 kfree(offload); 975 } 976 977 static void flow_offload_queue_work(struct flow_offload_work *offload) 978 { 979 if (offload->cmd == FLOW_CLS_REPLACE) 980 queue_work(nf_flow_offload_add_wq, &offload->work); 981 else if (offload->cmd == FLOW_CLS_DESTROY) 982 queue_work(nf_flow_offload_del_wq, &offload->work); 983 else 984 queue_work(nf_flow_offload_stats_wq, &offload->work); 985 } 986 987 static struct flow_offload_work * 988 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, 989 struct flow_offload *flow, unsigned int cmd) 990 { 991 struct flow_offload_work *offload; 992 993 if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags)) 994 return NULL; 995 996 offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC); 997 if (!offload) { 998 clear_bit(NF_FLOW_HW_PENDING, &flow->flags); 999 return NULL; 1000 } 1001 1002 offload->cmd = cmd; 1003 offload->flow = flow; 1004 offload->priority = flowtable->priority; 1005 offload->flowtable = flowtable; 1006 INIT_WORK(&offload->work, flow_offload_work_handler); 1007 1008 return offload; 1009 } 1010 1011 1012 void nf_flow_offload_add(struct nf_flowtable *flowtable, 1013 struct flow_offload *flow) 1014 { 1015 struct flow_offload_work *offload; 1016 1017 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE); 1018 if (!offload) 1019 return; 1020 1021 flow_offload_queue_work(offload); 1022 } 1023 1024 void nf_flow_offload_del(struct nf_flowtable *flowtable, 1025 struct flow_offload *flow) 1026 { 1027 struct flow_offload_work *offload; 1028 1029 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY); 1030 if (!offload) 1031 return; 1032 1033 set_bit(NF_FLOW_HW_DYING, &flow->flags); 1034 flow_offload_queue_work(offload); 1035 } 1036 1037 void nf_flow_offload_stats(struct nf_flowtable *flowtable, 1038 struct flow_offload *flow) 1039 { 1040 struct flow_offload_work *offload; 1041 __s32 delta; 1042 1043 delta = nf_flow_timeout_delta(flow->timeout); 1044 if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10)) 1045 return; 1046 1047 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS); 1048 if (!offload) 1049 return; 1050 1051 flow_offload_queue_work(offload); 1052 } 1053 1054 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) 1055 { 1056 if (nf_flowtable_hw_offload(flowtable)) { 1057 flush_workqueue(nf_flow_offload_add_wq); 1058 flush_workqueue(nf_flow_offload_del_wq); 1059 flush_workqueue(nf_flow_offload_stats_wq); 1060 } 1061 } 1062 1063 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, 1064 struct flow_block_offload *bo, 1065 enum flow_block_command cmd) 1066 { 1067 struct flow_block_cb *block_cb, *next; 1068 int err = 0; 1069 1070 switch (cmd) { 1071 case FLOW_BLOCK_BIND: 1072 list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); 1073 break; 1074 case FLOW_BLOCK_UNBIND: 1075 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { 1076 list_del(&block_cb->list); 1077 flow_block_cb_free(block_cb); 1078 } 1079 break; 1080 default: 1081 WARN_ON_ONCE(1); 1082 err = -EOPNOTSUPP; 1083 } 1084 1085 return err; 1086 } 1087 1088 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, 1089 struct net *net, 1090 enum flow_block_command cmd, 1091 struct nf_flowtable *flowtable, 1092 struct netlink_ext_ack *extack) 1093 { 1094 memset(bo, 0, sizeof(*bo)); 1095 bo->net = net; 1096 bo->block = &flowtable->flow_block; 1097 bo->command = cmd; 1098 bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; 1099 bo->extack = extack; 1100 INIT_LIST_HEAD(&bo->cb_list); 1101 } 1102 1103 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb) 1104 { 1105 struct nf_flowtable *flowtable = block_cb->indr.data; 1106 struct net_device *dev = block_cb->indr.dev; 1107 1108 nf_flow_table_gc_cleanup(flowtable, dev); 1109 down_write(&flowtable->flow_block_lock); 1110 list_del(&block_cb->list); 1111 list_del(&block_cb->driver_list); 1112 flow_block_cb_free(block_cb); 1113 up_write(&flowtable->flow_block_lock); 1114 } 1115 1116 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo, 1117 struct nf_flowtable *flowtable, 1118 struct net_device *dev, 1119 enum flow_block_command cmd, 1120 struct netlink_ext_ack *extack) 1121 { 1122 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, 1123 extack); 1124 1125 return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo, 1126 nf_flow_table_indr_cleanup); 1127 } 1128 1129 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, 1130 struct nf_flowtable *flowtable, 1131 struct net_device *dev, 1132 enum flow_block_command cmd, 1133 struct netlink_ext_ack *extack) 1134 { 1135 int err; 1136 1137 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, 1138 extack); 1139 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); 1140 if (err < 0) 1141 return err; 1142 1143 return 0; 1144 } 1145 1146 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, 1147 struct net_device *dev, 1148 enum flow_block_command cmd) 1149 { 1150 struct netlink_ext_ack extack = {}; 1151 struct flow_block_offload bo; 1152 int err; 1153 1154 if (!nf_flowtable_hw_offload(flowtable)) 1155 return 0; 1156 1157 if (dev->netdev_ops->ndo_setup_tc) 1158 err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, 1159 &extack); 1160 else 1161 err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, 1162 &extack); 1163 if (err < 0) 1164 return err; 1165 1166 return nf_flow_table_block_setup(flowtable, &bo, cmd); 1167 } 1168 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup); 1169 1170 int nf_flow_table_offload_init(void) 1171 { 1172 nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add", 1173 WQ_UNBOUND | WQ_SYSFS, 0); 1174 if (!nf_flow_offload_add_wq) 1175 return -ENOMEM; 1176 1177 nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del", 1178 WQ_UNBOUND | WQ_SYSFS, 0); 1179 if (!nf_flow_offload_del_wq) 1180 goto err_del_wq; 1181 1182 nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats", 1183 WQ_UNBOUND | WQ_SYSFS, 0); 1184 if (!nf_flow_offload_stats_wq) 1185 goto err_stats_wq; 1186 1187 return 0; 1188 1189 err_stats_wq: 1190 destroy_workqueue(nf_flow_offload_del_wq); 1191 err_del_wq: 1192 destroy_workqueue(nf_flow_offload_add_wq); 1193 return -ENOMEM; 1194 } 1195 1196 void nf_flow_table_offload_exit(void) 1197 { 1198 destroy_workqueue(nf_flow_offload_add_wq); 1199 destroy_workqueue(nf_flow_offload_del_wq); 1200 destroy_workqueue(nf_flow_offload_stats_wq); 1201 } 1202