1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* - 3 * net/sched/act_ct.c Connection Tracking action 4 * 5 * Authors: Paul Blakey <paulb@mellanox.com> 6 * Yossi Kuperman <yossiku@mellanox.com> 7 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> 8 */ 9 10 #include <linux/module.h> 11 #include <linux/init.h> 12 #include <linux/kernel.h> 13 #include <linux/skbuff.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/pkt_cls.h> 16 #include <linux/ip.h> 17 #include <linux/ipv6.h> 18 #include <net/netlink.h> 19 #include <net/pkt_sched.h> 20 #include <net/pkt_cls.h> 21 #include <net/act_api.h> 22 #include <net/ip.h> 23 #include <net/ipv6_frag.h> 24 #include <uapi/linux/tc_act/tc_ct.h> 25 #include <net/tc_act/tc_ct.h> 26 27 #include <linux/netfilter/nf_nat.h> 28 #include <net/netfilter/nf_conntrack.h> 29 #include <net/netfilter/nf_conntrack_core.h> 30 #include <net/netfilter/nf_conntrack_zones.h> 31 #include <net/netfilter/nf_conntrack_helper.h> 32 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 33 34 static struct tc_action_ops act_ct_ops; 35 static unsigned int ct_net_id; 36 37 struct tc_ct_action_net { 38 struct tc_action_net tn; /* Must be first */ 39 bool labels; 40 }; 41 42 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ 43 static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, 44 u16 zone_id, bool force) 45 { 46 enum ip_conntrack_info ctinfo; 47 struct nf_conn *ct; 48 49 ct = nf_ct_get(skb, &ctinfo); 50 if (!ct) 51 return false; 52 if (!net_eq(net, read_pnet(&ct->ct_net))) 53 return false; 54 if (nf_ct_zone(ct)->id != zone_id) 55 return false; 56 57 /* Force conntrack entry direction. */ 58 if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 59 if (nf_ct_is_confirmed(ct)) 60 nf_ct_kill(ct); 61 62 nf_conntrack_put(&ct->ct_general); 63 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 64 65 return false; 66 } 67 68 return true; 69 } 70 71 /* Trim the skb to the length specified by the IP/IPv6 header, 72 * removing any trailing lower-layer padding. This prepares the skb 73 * for higher-layer processing that assumes skb->len excludes padding 74 * (such as nf_ip_checksum). The caller needs to pull the skb to the 75 * network header, and ensure ip_hdr/ipv6_hdr points to valid data. 76 */ 77 static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) 78 { 79 unsigned int len; 80 int err; 81 82 switch (family) { 83 case NFPROTO_IPV4: 84 len = ntohs(ip_hdr(skb)->tot_len); 85 break; 86 case NFPROTO_IPV6: 87 len = sizeof(struct ipv6hdr) 88 + ntohs(ipv6_hdr(skb)->payload_len); 89 break; 90 default: 91 len = skb->len; 92 } 93 94 err = pskb_trim_rcsum(skb, len); 95 96 return err; 97 } 98 99 static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) 100 { 101 u8 family = NFPROTO_UNSPEC; 102 103 switch (skb->protocol) { 104 case htons(ETH_P_IP): 105 family = NFPROTO_IPV4; 106 break; 107 case htons(ETH_P_IPV6): 108 family = NFPROTO_IPV6; 109 break; 110 default: 111 break; 112 } 113 114 return family; 115 } 116 117 static int tcf_ct_ipv4_is_fragment(struct sk_buff *skb, bool *frag) 118 { 119 unsigned int len; 120 121 len = skb_network_offset(skb) + sizeof(struct iphdr); 122 if (unlikely(skb->len < len)) 123 return -EINVAL; 124 if (unlikely(!pskb_may_pull(skb, len))) 125 return -ENOMEM; 126 127 *frag = ip_is_fragment(ip_hdr(skb)); 128 return 0; 129 } 130 131 static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) 132 { 133 unsigned int flags = 0, len, payload_ofs = 0; 134 unsigned short frag_off; 135 int nexthdr; 136 137 len = skb_network_offset(skb) + sizeof(struct ipv6hdr); 138 if (unlikely(skb->len < len)) 139 return -EINVAL; 140 if (unlikely(!pskb_may_pull(skb, len))) 141 return -ENOMEM; 142 143 nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags); 144 if (unlikely(nexthdr < 0)) 145 return -EPROTO; 146 147 *frag = flags & IP6_FH_F_FRAG; 148 return 0; 149 } 150 151 static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, 152 u8 family, u16 zone) 153 { 154 enum ip_conntrack_info ctinfo; 155 struct nf_conn *ct; 156 int err = 0; 157 bool frag; 158 159 /* Previously seen (loopback)? Ignore. */ 160 ct = nf_ct_get(skb, &ctinfo); 161 if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) 162 return 0; 163 164 if (family == NFPROTO_IPV4) 165 err = tcf_ct_ipv4_is_fragment(skb, &frag); 166 else 167 err = tcf_ct_ipv6_is_fragment(skb, &frag); 168 if (err || !frag) 169 return err; 170 171 skb_get(skb); 172 173 if (family == NFPROTO_IPV4) { 174 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; 175 176 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 177 local_bh_disable(); 178 err = ip_defrag(net, skb, user); 179 local_bh_enable(); 180 if (err && err != -EINPROGRESS) 181 goto out_free; 182 } else { /* NFPROTO_IPV6 */ 183 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 184 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 185 186 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 187 err = nf_ct_frag6_gather(net, skb, user); 188 if (err && err != -EINPROGRESS) 189 goto out_free; 190 #else 191 err = -EOPNOTSUPP; 192 goto out_free; 193 #endif 194 } 195 196 skb_clear_hash(skb); 197 skb->ignore_df = 1; 198 return err; 199 200 out_free: 201 kfree_skb(skb); 202 return err; 203 } 204 205 static void tcf_ct_params_free(struct rcu_head *head) 206 { 207 struct tcf_ct_params *params = container_of(head, 208 struct tcf_ct_params, rcu); 209 210 if (params->tmpl) 211 nf_conntrack_put(¶ms->tmpl->ct_general); 212 kfree(params); 213 } 214 215 #if IS_ENABLED(CONFIG_NF_NAT) 216 /* Modelled after nf_nat_ipv[46]_fn(). 217 * range is only used for new, uninitialized NAT state. 218 * Returns either NF_ACCEPT or NF_DROP. 219 */ 220 static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 221 enum ip_conntrack_info ctinfo, 222 const struct nf_nat_range2 *range, 223 enum nf_nat_manip_type maniptype) 224 { 225 int hooknum, err = NF_ACCEPT; 226 227 /* See HOOK2MANIP(). */ 228 if (maniptype == NF_NAT_MANIP_SRC) 229 hooknum = NF_INET_LOCAL_IN; /* Source NAT */ 230 else 231 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ 232 233 switch (ctinfo) { 234 case IP_CT_RELATED: 235 case IP_CT_RELATED_REPLY: 236 if (skb->protocol == htons(ETH_P_IP) && 237 ip_hdr(skb)->protocol == IPPROTO_ICMP) { 238 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 239 hooknum)) 240 err = NF_DROP; 241 goto out; 242 } else if (IS_ENABLED(CONFIG_IPV6) && 243 skb->protocol == htons(ETH_P_IPV6)) { 244 __be16 frag_off; 245 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 246 int hdrlen = ipv6_skip_exthdr(skb, 247 sizeof(struct ipv6hdr), 248 &nexthdr, &frag_off); 249 250 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 251 if (!nf_nat_icmpv6_reply_translation(skb, ct, 252 ctinfo, 253 hooknum, 254 hdrlen)) 255 err = NF_DROP; 256 goto out; 257 } 258 } 259 /* Non-ICMP, fall thru to initialize if needed. */ 260 /* fall through */ 261 case IP_CT_NEW: 262 /* Seen it before? This can happen for loopback, retrans, 263 * or local packets. 264 */ 265 if (!nf_nat_initialized(ct, maniptype)) { 266 /* Initialize according to the NAT action. */ 267 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) 268 /* Action is set up to establish a new 269 * mapping. 270 */ 271 ? nf_nat_setup_info(ct, range, maniptype) 272 : nf_nat_alloc_null_binding(ct, hooknum); 273 if (err != NF_ACCEPT) 274 goto out; 275 } 276 break; 277 278 case IP_CT_ESTABLISHED: 279 case IP_CT_ESTABLISHED_REPLY: 280 break; 281 282 default: 283 err = NF_DROP; 284 goto out; 285 } 286 287 err = nf_nat_packet(ct, ctinfo, hooknum, skb); 288 out: 289 return err; 290 } 291 #endif /* CONFIG_NF_NAT */ 292 293 static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) 294 { 295 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 296 u32 new_mark; 297 298 if (!mask) 299 return; 300 301 new_mark = mark | (ct->mark & ~(mask)); 302 if (ct->mark != new_mark) { 303 ct->mark = new_mark; 304 if (nf_ct_is_confirmed(ct)) 305 nf_conntrack_event_cache(IPCT_MARK, ct); 306 } 307 #endif 308 } 309 310 static void tcf_ct_act_set_labels(struct nf_conn *ct, 311 u32 *labels, 312 u32 *labels_m) 313 { 314 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) 315 size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels); 316 317 if (!memchr_inv(labels_m, 0, labels_sz)) 318 return; 319 320 nf_connlabels_replace(ct, labels, labels_m, 4); 321 #endif 322 } 323 324 static int tcf_ct_act_nat(struct sk_buff *skb, 325 struct nf_conn *ct, 326 enum ip_conntrack_info ctinfo, 327 int ct_action, 328 struct nf_nat_range2 *range, 329 bool commit) 330 { 331 #if IS_ENABLED(CONFIG_NF_NAT) 332 enum nf_nat_manip_type maniptype; 333 334 if (!(ct_action & TCA_CT_ACT_NAT)) 335 return NF_ACCEPT; 336 337 /* Add NAT extension if not confirmed yet. */ 338 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) 339 return NF_DROP; /* Can't NAT. */ 340 341 if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) && 342 (ctinfo != IP_CT_RELATED || commit)) { 343 /* NAT an established or related connection like before. */ 344 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 345 /* This is the REPLY direction for a connection 346 * for which NAT was applied in the forward 347 * direction. Do the reverse NAT. 348 */ 349 maniptype = ct->status & IPS_SRC_NAT 350 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; 351 else 352 maniptype = ct->status & IPS_SRC_NAT 353 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; 354 } else if (ct_action & TCA_CT_ACT_NAT_SRC) { 355 maniptype = NF_NAT_MANIP_SRC; 356 } else if (ct_action & TCA_CT_ACT_NAT_DST) { 357 maniptype = NF_NAT_MANIP_DST; 358 } else { 359 return NF_ACCEPT; 360 } 361 362 return ct_nat_execute(skb, ct, ctinfo, range, maniptype); 363 #else 364 return NF_ACCEPT; 365 #endif 366 } 367 368 static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, 369 struct tcf_result *res) 370 { 371 struct net *net = dev_net(skb->dev); 372 bool cached, commit, clear, force; 373 enum ip_conntrack_info ctinfo; 374 struct tcf_ct *c = to_ct(a); 375 struct nf_conn *tmpl = NULL; 376 struct nf_hook_state state; 377 int nh_ofs, err, retval; 378 struct tcf_ct_params *p; 379 struct nf_conn *ct; 380 u8 family; 381 382 p = rcu_dereference_bh(c->params); 383 384 retval = READ_ONCE(c->tcf_action); 385 commit = p->ct_action & TCA_CT_ACT_COMMIT; 386 clear = p->ct_action & TCA_CT_ACT_CLEAR; 387 force = p->ct_action & TCA_CT_ACT_FORCE; 388 tmpl = p->tmpl; 389 390 if (clear) { 391 ct = nf_ct_get(skb, &ctinfo); 392 if (ct) { 393 nf_conntrack_put(&ct->ct_general); 394 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 395 } 396 397 goto out; 398 } 399 400 family = tcf_ct_skb_nf_family(skb); 401 if (family == NFPROTO_UNSPEC) 402 goto drop; 403 404 /* The conntrack module expects to be working at L3. 405 * We also try to pull the IPv4/6 header to linear area 406 */ 407 nh_ofs = skb_network_offset(skb); 408 skb_pull_rcsum(skb, nh_ofs); 409 err = tcf_ct_handle_fragments(net, skb, family, p->zone); 410 if (err == -EINPROGRESS) { 411 retval = TC_ACT_STOLEN; 412 goto out; 413 } 414 if (err) 415 goto drop; 416 417 err = tcf_ct_skb_network_trim(skb, family); 418 if (err) 419 goto drop; 420 421 /* If we are recirculating packets to match on ct fields and 422 * committing with a separate ct action, then we don't need to 423 * actually run the packet through conntrack twice unless it's for a 424 * different zone. 425 */ 426 cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); 427 if (!cached) { 428 /* Associate skb with specified zone. */ 429 if (tmpl) { 430 ct = nf_ct_get(skb, &ctinfo); 431 if (skb_nfct(skb)) 432 nf_conntrack_put(skb_nfct(skb)); 433 nf_conntrack_get(&tmpl->ct_general); 434 nf_ct_set(skb, tmpl, IP_CT_NEW); 435 } 436 437 state.hook = NF_INET_PRE_ROUTING; 438 state.net = net; 439 state.pf = family; 440 err = nf_conntrack_in(skb, &state); 441 if (err != NF_ACCEPT) 442 goto out_push; 443 } 444 445 ct = nf_ct_get(skb, &ctinfo); 446 if (!ct) 447 goto out_push; 448 nf_ct_deliver_cached_events(ct); 449 450 err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit); 451 if (err != NF_ACCEPT) 452 goto drop; 453 454 if (commit) { 455 tcf_ct_act_set_mark(ct, p->mark, p->mark_mask); 456 tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); 457 458 /* This will take care of sending queued events 459 * even if the connection is already confirmed. 460 */ 461 nf_conntrack_confirm(skb); 462 } 463 464 out_push: 465 skb_push_rcsum(skb, nh_ofs); 466 467 out: 468 bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb); 469 return retval; 470 471 drop: 472 qstats_drop_inc(this_cpu_ptr(a->cpu_qstats)); 473 return TC_ACT_SHOT; 474 } 475 476 static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { 477 [TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 }, 478 [TCA_CT_ACTION] = { .type = NLA_U16 }, 479 [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) }, 480 [TCA_CT_ZONE] = { .type = NLA_U16 }, 481 [TCA_CT_MARK] = { .type = NLA_U32 }, 482 [TCA_CT_MARK_MASK] = { .type = NLA_U32 }, 483 [TCA_CT_LABELS] = { .type = NLA_BINARY, 484 .len = 128 / BITS_PER_BYTE }, 485 [TCA_CT_LABELS_MASK] = { .type = NLA_BINARY, 486 .len = 128 / BITS_PER_BYTE }, 487 [TCA_CT_NAT_IPV4_MIN] = { .type = NLA_U32 }, 488 [TCA_CT_NAT_IPV4_MAX] = { .type = NLA_U32 }, 489 [TCA_CT_NAT_IPV6_MIN] = { .type = NLA_EXACT_LEN, 490 .len = sizeof(struct in6_addr) }, 491 [TCA_CT_NAT_IPV6_MAX] = { .type = NLA_EXACT_LEN, 492 .len = sizeof(struct in6_addr) }, 493 [TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 }, 494 [TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 }, 495 }; 496 497 static int tcf_ct_fill_params_nat(struct tcf_ct_params *p, 498 struct tc_ct *parm, 499 struct nlattr **tb, 500 struct netlink_ext_ack *extack) 501 { 502 struct nf_nat_range2 *range; 503 504 if (!(p->ct_action & TCA_CT_ACT_NAT)) 505 return 0; 506 507 if (!IS_ENABLED(CONFIG_NF_NAT)) { 508 NL_SET_ERR_MSG_MOD(extack, "Netfilter nat isn't enabled in kernel"); 509 return -EOPNOTSUPP; 510 } 511 512 if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) 513 return 0; 514 515 if ((p->ct_action & TCA_CT_ACT_NAT_SRC) && 516 (p->ct_action & TCA_CT_ACT_NAT_DST)) { 517 NL_SET_ERR_MSG_MOD(extack, "dnat and snat can't be enabled at the same time"); 518 return -EOPNOTSUPP; 519 } 520 521 range = &p->range; 522 if (tb[TCA_CT_NAT_IPV4_MIN]) { 523 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV4_MAX]; 524 525 p->ipv4_range = true; 526 range->flags |= NF_NAT_RANGE_MAP_IPS; 527 range->min_addr.ip = 528 nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]); 529 530 range->max_addr.ip = max_attr ? 531 nla_get_in_addr(max_attr) : 532 range->min_addr.ip; 533 } else if (tb[TCA_CT_NAT_IPV6_MIN]) { 534 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX]; 535 536 p->ipv4_range = false; 537 range->flags |= NF_NAT_RANGE_MAP_IPS; 538 range->min_addr.in6 = 539 nla_get_in6_addr(tb[TCA_CT_NAT_IPV6_MIN]); 540 541 range->max_addr.in6 = max_attr ? 542 nla_get_in6_addr(max_attr) : 543 range->min_addr.in6; 544 } 545 546 if (tb[TCA_CT_NAT_PORT_MIN]) { 547 range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 548 range->min_proto.all = nla_get_be16(tb[TCA_CT_NAT_PORT_MIN]); 549 550 range->max_proto.all = tb[TCA_CT_NAT_PORT_MAX] ? 551 nla_get_be16(tb[TCA_CT_NAT_PORT_MAX]) : 552 range->min_proto.all; 553 } 554 555 return 0; 556 } 557 558 static void tcf_ct_set_key_val(struct nlattr **tb, 559 void *val, int val_type, 560 void *mask, int mask_type, 561 int len) 562 { 563 if (!tb[val_type]) 564 return; 565 nla_memcpy(val, tb[val_type], len); 566 567 if (!mask) 568 return; 569 570 if (mask_type == TCA_CT_UNSPEC || !tb[mask_type]) 571 memset(mask, 0xff, len); 572 else 573 nla_memcpy(mask, tb[mask_type], len); 574 } 575 576 static int tcf_ct_fill_params(struct net *net, 577 struct tcf_ct_params *p, 578 struct tc_ct *parm, 579 struct nlattr **tb, 580 struct netlink_ext_ack *extack) 581 { 582 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 583 struct nf_conntrack_zone zone; 584 struct nf_conn *tmpl; 585 int err; 586 587 p->zone = NF_CT_DEFAULT_ZONE_ID; 588 589 tcf_ct_set_key_val(tb, 590 &p->ct_action, TCA_CT_ACTION, 591 NULL, TCA_CT_UNSPEC, 592 sizeof(p->ct_action)); 593 594 if (p->ct_action & TCA_CT_ACT_CLEAR) 595 return 0; 596 597 err = tcf_ct_fill_params_nat(p, parm, tb, extack); 598 if (err) 599 return err; 600 601 if (tb[TCA_CT_MARK]) { 602 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) { 603 NL_SET_ERR_MSG_MOD(extack, "Conntrack mark isn't enabled."); 604 return -EOPNOTSUPP; 605 } 606 tcf_ct_set_key_val(tb, 607 &p->mark, TCA_CT_MARK, 608 &p->mark_mask, TCA_CT_MARK_MASK, 609 sizeof(p->mark)); 610 } 611 612 if (tb[TCA_CT_LABELS]) { 613 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { 614 NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); 615 return -EOPNOTSUPP; 616 } 617 618 if (!tn->labels) { 619 NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); 620 return -EOPNOTSUPP; 621 } 622 tcf_ct_set_key_val(tb, 623 p->labels, TCA_CT_LABELS, 624 p->labels_mask, TCA_CT_LABELS_MASK, 625 sizeof(p->labels)); 626 } 627 628 if (tb[TCA_CT_ZONE]) { 629 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { 630 NL_SET_ERR_MSG_MOD(extack, "Conntrack zones isn't enabled."); 631 return -EOPNOTSUPP; 632 } 633 634 tcf_ct_set_key_val(tb, 635 &p->zone, TCA_CT_ZONE, 636 NULL, TCA_CT_UNSPEC, 637 sizeof(p->zone)); 638 } 639 640 if (p->zone == NF_CT_DEFAULT_ZONE_ID) 641 return 0; 642 643 nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); 644 tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); 645 if (!tmpl) { 646 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template"); 647 return -ENOMEM; 648 } 649 __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); 650 nf_conntrack_get(&tmpl->ct_general); 651 p->tmpl = tmpl; 652 653 return 0; 654 } 655 656 static int tcf_ct_init(struct net *net, struct nlattr *nla, 657 struct nlattr *est, struct tc_action **a, 658 int replace, int bind, bool rtnl_held, 659 struct tcf_proto *tp, 660 struct netlink_ext_ack *extack) 661 { 662 struct tc_action_net *tn = net_generic(net, ct_net_id); 663 struct tcf_ct_params *params = NULL; 664 struct nlattr *tb[TCA_CT_MAX + 1]; 665 struct tcf_chain *goto_ch = NULL; 666 struct tc_ct *parm; 667 struct tcf_ct *c; 668 int err, res = 0; 669 670 if (!nla) { 671 NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed"); 672 return -EINVAL; 673 } 674 675 err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack); 676 if (err < 0) 677 return err; 678 679 if (!tb[TCA_CT_PARMS]) { 680 NL_SET_ERR_MSG_MOD(extack, "Missing required ct parameters"); 681 return -EINVAL; 682 } 683 parm = nla_data(tb[TCA_CT_PARMS]); 684 685 err = tcf_idr_check_alloc(tn, &parm->index, a, bind); 686 if (err < 0) 687 return err; 688 689 if (!err) { 690 err = tcf_idr_create(tn, parm->index, est, a, 691 &act_ct_ops, bind, true); 692 if (err) { 693 tcf_idr_cleanup(tn, parm->index); 694 return err; 695 } 696 res = ACT_P_CREATED; 697 } else { 698 if (bind) 699 return 0; 700 701 if (!replace) { 702 tcf_idr_release(*a, bind); 703 return -EEXIST; 704 } 705 } 706 err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); 707 if (err < 0) 708 goto cleanup; 709 710 c = to_ct(*a); 711 712 params = kzalloc(sizeof(*params), GFP_KERNEL); 713 if (unlikely(!params)) { 714 err = -ENOMEM; 715 goto cleanup; 716 } 717 718 err = tcf_ct_fill_params(net, params, parm, tb, extack); 719 if (err) 720 goto cleanup; 721 722 spin_lock_bh(&c->tcf_lock); 723 goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); 724 rcu_swap_protected(c->params, params, lockdep_is_held(&c->tcf_lock)); 725 spin_unlock_bh(&c->tcf_lock); 726 727 if (goto_ch) 728 tcf_chain_put_by_act(goto_ch); 729 if (params) 730 kfree_rcu(params, rcu); 731 if (res == ACT_P_CREATED) 732 tcf_idr_insert(tn, *a); 733 734 return res; 735 736 cleanup: 737 if (goto_ch) 738 tcf_chain_put_by_act(goto_ch); 739 kfree(params); 740 tcf_idr_release(*a, bind); 741 return err; 742 } 743 744 static void tcf_ct_cleanup(struct tc_action *a) 745 { 746 struct tcf_ct_params *params; 747 struct tcf_ct *c = to_ct(a); 748 749 params = rcu_dereference_protected(c->params, 1); 750 if (params) 751 call_rcu(¶ms->rcu, tcf_ct_params_free); 752 } 753 754 static int tcf_ct_dump_key_val(struct sk_buff *skb, 755 void *val, int val_type, 756 void *mask, int mask_type, 757 int len) 758 { 759 int err; 760 761 if (mask && !memchr_inv(mask, 0, len)) 762 return 0; 763 764 err = nla_put(skb, val_type, len, val); 765 if (err) 766 return err; 767 768 if (mask_type != TCA_CT_UNSPEC) { 769 err = nla_put(skb, mask_type, len, mask); 770 if (err) 771 return err; 772 } 773 774 return 0; 775 } 776 777 static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p) 778 { 779 struct nf_nat_range2 *range = &p->range; 780 781 if (!(p->ct_action & TCA_CT_ACT_NAT)) 782 return 0; 783 784 if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) 785 return 0; 786 787 if (range->flags & NF_NAT_RANGE_MAP_IPS) { 788 if (p->ipv4_range) { 789 if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MIN, 790 range->min_addr.ip)) 791 return -1; 792 if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MAX, 793 range->max_addr.ip)) 794 return -1; 795 } else { 796 if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MIN, 797 &range->min_addr.in6)) 798 return -1; 799 if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MAX, 800 &range->max_addr.in6)) 801 return -1; 802 } 803 } 804 805 if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { 806 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MIN, 807 range->min_proto.all)) 808 return -1; 809 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MAX, 810 range->max_proto.all)) 811 return -1; 812 } 813 814 return 0; 815 } 816 817 static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, 818 int bind, int ref) 819 { 820 unsigned char *b = skb_tail_pointer(skb); 821 struct tcf_ct *c = to_ct(a); 822 struct tcf_ct_params *p; 823 824 struct tc_ct opt = { 825 .index = c->tcf_index, 826 .refcnt = refcount_read(&c->tcf_refcnt) - ref, 827 .bindcnt = atomic_read(&c->tcf_bindcnt) - bind, 828 }; 829 struct tcf_t t; 830 831 spin_lock_bh(&c->tcf_lock); 832 p = rcu_dereference_protected(c->params, 833 lockdep_is_held(&c->tcf_lock)); 834 opt.action = c->tcf_action; 835 836 if (tcf_ct_dump_key_val(skb, 837 &p->ct_action, TCA_CT_ACTION, 838 NULL, TCA_CT_UNSPEC, 839 sizeof(p->ct_action))) 840 goto nla_put_failure; 841 842 if (p->ct_action & TCA_CT_ACT_CLEAR) 843 goto skip_dump; 844 845 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 846 tcf_ct_dump_key_val(skb, 847 &p->mark, TCA_CT_MARK, 848 &p->mark_mask, TCA_CT_MARK_MASK, 849 sizeof(p->mark))) 850 goto nla_put_failure; 851 852 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 853 tcf_ct_dump_key_val(skb, 854 p->labels, TCA_CT_LABELS, 855 p->labels_mask, TCA_CT_LABELS_MASK, 856 sizeof(p->labels))) 857 goto nla_put_failure; 858 859 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 860 tcf_ct_dump_key_val(skb, 861 &p->zone, TCA_CT_ZONE, 862 NULL, TCA_CT_UNSPEC, 863 sizeof(p->zone))) 864 goto nla_put_failure; 865 866 if (tcf_ct_dump_nat(skb, p)) 867 goto nla_put_failure; 868 869 skip_dump: 870 if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt)) 871 goto nla_put_failure; 872 873 tcf_tm_dump(&t, &c->tcf_tm); 874 if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD)) 875 goto nla_put_failure; 876 spin_unlock_bh(&c->tcf_lock); 877 878 return skb->len; 879 nla_put_failure: 880 spin_unlock_bh(&c->tcf_lock); 881 nlmsg_trim(skb, b); 882 return -1; 883 } 884 885 static int tcf_ct_walker(struct net *net, struct sk_buff *skb, 886 struct netlink_callback *cb, int type, 887 const struct tc_action_ops *ops, 888 struct netlink_ext_ack *extack) 889 { 890 struct tc_action_net *tn = net_generic(net, ct_net_id); 891 892 return tcf_generic_walker(tn, skb, cb, type, ops, extack); 893 } 894 895 static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) 896 { 897 struct tc_action_net *tn = net_generic(net, ct_net_id); 898 899 return tcf_idr_search(tn, a, index); 900 } 901 902 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, 903 u64 lastuse, bool hw) 904 { 905 struct tcf_ct *c = to_ct(a); 906 907 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); 908 909 if (hw) 910 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), 911 bytes, packets); 912 c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); 913 } 914 915 static struct tc_action_ops act_ct_ops = { 916 .kind = "ct", 917 .id = TCA_ID_CT, 918 .owner = THIS_MODULE, 919 .act = tcf_ct_act, 920 .dump = tcf_ct_dump, 921 .init = tcf_ct_init, 922 .cleanup = tcf_ct_cleanup, 923 .walk = tcf_ct_walker, 924 .lookup = tcf_ct_search, 925 .stats_update = tcf_stats_update, 926 .size = sizeof(struct tcf_ct), 927 }; 928 929 static __net_init int ct_init_net(struct net *net) 930 { 931 unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8; 932 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 933 934 if (nf_connlabels_get(net, n_bits - 1)) { 935 tn->labels = false; 936 pr_err("act_ct: Failed to set connlabels length"); 937 } else { 938 tn->labels = true; 939 } 940 941 return tc_action_net_init(&tn->tn, &act_ct_ops); 942 } 943 944 static void __net_exit ct_exit_net(struct list_head *net_list) 945 { 946 struct net *net; 947 948 rtnl_lock(); 949 list_for_each_entry(net, net_list, exit_list) { 950 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 951 952 if (tn->labels) 953 nf_connlabels_put(net); 954 } 955 rtnl_unlock(); 956 957 tc_action_net_exit(net_list, ct_net_id); 958 } 959 960 static struct pernet_operations ct_net_ops = { 961 .init = ct_init_net, 962 .exit_batch = ct_exit_net, 963 .id = &ct_net_id, 964 .size = sizeof(struct tc_ct_action_net), 965 }; 966 967 static int __init ct_init_module(void) 968 { 969 return tcf_register_action(&act_ct_ops, &ct_net_ops); 970 } 971 972 static void __exit ct_cleanup_module(void) 973 { 974 tcf_unregister_action(&act_ct_ops, &ct_net_ops); 975 } 976 977 module_init(ct_init_module); 978 module_exit(ct_cleanup_module); 979 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>"); 980 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>"); 981 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>"); 982 MODULE_DESCRIPTION("Connection tracking action"); 983 MODULE_LICENSE("GPL v2"); 984 985