1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* - 3 * net/sched/act_ct.c Connection Tracking action 4 * 5 * Authors: Paul Blakey <paulb@mellanox.com> 6 * Yossi Kuperman <yossiku@mellanox.com> 7 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> 8 */ 9 10 #include <linux/module.h> 11 #include <linux/init.h> 12 #include <linux/kernel.h> 13 #include <linux/skbuff.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/pkt_cls.h> 16 #include <linux/ip.h> 17 #include <linux/ipv6.h> 18 #include <net/netlink.h> 19 #include <net/pkt_sched.h> 20 #include <net/pkt_cls.h> 21 #include <net/act_api.h> 22 #include <net/ip.h> 23 #include <net/ipv6_frag.h> 24 #include <uapi/linux/tc_act/tc_ct.h> 25 #include <net/tc_act/tc_ct.h> 26 27 #include <net/netfilter/nf_conntrack.h> 28 #include <net/netfilter/nf_conntrack_core.h> 29 #include <net/netfilter/nf_conntrack_zones.h> 30 #include <net/netfilter/nf_conntrack_helper.h> 31 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 32 #include <uapi/linux/netfilter/nf_nat.h> 33 34 static struct tc_action_ops act_ct_ops; 35 static unsigned int ct_net_id; 36 37 struct tc_ct_action_net { 38 struct tc_action_net tn; /* Must be first */ 39 bool labels; 40 }; 41 42 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ 43 static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, 44 u16 zone_id, bool force) 45 { 46 enum ip_conntrack_info ctinfo; 47 struct nf_conn *ct; 48 49 ct = nf_ct_get(skb, &ctinfo); 50 if (!ct) 51 return false; 52 if (!net_eq(net, read_pnet(&ct->ct_net))) 53 return false; 54 if (nf_ct_zone(ct)->id != zone_id) 55 return false; 56 57 /* Force conntrack entry direction. */ 58 if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 59 if (nf_ct_is_confirmed(ct)) 60 nf_ct_kill(ct); 61 62 nf_conntrack_put(&ct->ct_general); 63 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 64 65 return false; 66 } 67 68 return true; 69 } 70 71 /* Trim the skb to the length specified by the IP/IPv6 header, 72 * removing any trailing lower-layer padding. This prepares the skb 73 * for higher-layer processing that assumes skb->len excludes padding 74 * (such as nf_ip_checksum). The caller needs to pull the skb to the 75 * network header, and ensure ip_hdr/ipv6_hdr points to valid data. 76 */ 77 static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) 78 { 79 unsigned int len; 80 int err; 81 82 switch (family) { 83 case NFPROTO_IPV4: 84 len = ntohs(ip_hdr(skb)->tot_len); 85 break; 86 case NFPROTO_IPV6: 87 len = sizeof(struct ipv6hdr) 88 + ntohs(ipv6_hdr(skb)->payload_len); 89 break; 90 default: 91 len = skb->len; 92 } 93 94 err = pskb_trim_rcsum(skb, len); 95 96 return err; 97 } 98 99 static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) 100 { 101 u8 family = NFPROTO_UNSPEC; 102 103 switch (skb->protocol) { 104 case htons(ETH_P_IP): 105 family = NFPROTO_IPV4; 106 break; 107 case htons(ETH_P_IPV6): 108 family = NFPROTO_IPV6; 109 break; 110 default: 111 break; 112 } 113 114 return family; 115 } 116 117 static int tcf_ct_ipv4_is_fragment(struct sk_buff *skb, bool *frag) 118 { 119 unsigned int len; 120 121 len = skb_network_offset(skb) + sizeof(struct iphdr); 122 if (unlikely(skb->len < len)) 123 return -EINVAL; 124 if (unlikely(!pskb_may_pull(skb, len))) 125 return -ENOMEM; 126 127 *frag = ip_is_fragment(ip_hdr(skb)); 128 return 0; 129 } 130 131 static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) 132 { 133 unsigned int flags = 0, len, payload_ofs = 0; 134 unsigned short frag_off; 135 int nexthdr; 136 137 len = skb_network_offset(skb) + sizeof(struct ipv6hdr); 138 if (unlikely(skb->len < len)) 139 return -EINVAL; 140 if (unlikely(!pskb_may_pull(skb, len))) 141 return -ENOMEM; 142 143 nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags); 144 if (unlikely(nexthdr < 0)) 145 return -EPROTO; 146 147 *frag = flags & IP6_FH_F_FRAG; 148 return 0; 149 } 150 151 static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, 152 u8 family, u16 zone) 153 { 154 enum ip_conntrack_info ctinfo; 155 struct nf_conn *ct; 156 int err = 0; 157 bool frag; 158 159 /* Previously seen (loopback)? Ignore. */ 160 ct = nf_ct_get(skb, &ctinfo); 161 if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) 162 return 0; 163 164 if (family == NFPROTO_IPV4) 165 err = tcf_ct_ipv4_is_fragment(skb, &frag); 166 else 167 err = tcf_ct_ipv6_is_fragment(skb, &frag); 168 if (err || !frag) 169 return err; 170 171 skb_get(skb); 172 173 if (family == NFPROTO_IPV4) { 174 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; 175 176 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 177 local_bh_disable(); 178 err = ip_defrag(net, skb, user); 179 local_bh_enable(); 180 if (err && err != -EINPROGRESS) 181 goto out_free; 182 } else { /* NFPROTO_IPV6 */ 183 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 184 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 185 186 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 187 err = nf_ct_frag6_gather(net, skb, user); 188 if (err && err != -EINPROGRESS) 189 goto out_free; 190 #else 191 err = -EOPNOTSUPP; 192 goto out_free; 193 #endif 194 } 195 196 skb_clear_hash(skb); 197 skb->ignore_df = 1; 198 return err; 199 200 out_free: 201 kfree_skb(skb); 202 return err; 203 } 204 205 static void tcf_ct_params_free(struct rcu_head *head) 206 { 207 struct tcf_ct_params *params = container_of(head, 208 struct tcf_ct_params, rcu); 209 210 if (params->tmpl) 211 nf_conntrack_put(¶ms->tmpl->ct_general); 212 kfree(params); 213 } 214 215 #if IS_ENABLED(CONFIG_NF_NAT) 216 /* Modelled after nf_nat_ipv[46]_fn(). 217 * range is only used for new, uninitialized NAT state. 218 * Returns either NF_ACCEPT or NF_DROP. 219 */ 220 static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 221 enum ip_conntrack_info ctinfo, 222 const struct nf_nat_range2 *range, 223 enum nf_nat_manip_type maniptype) 224 { 225 int hooknum, err = NF_ACCEPT; 226 227 /* See HOOK2MANIP(). */ 228 if (maniptype == NF_NAT_MANIP_SRC) 229 hooknum = NF_INET_LOCAL_IN; /* Source NAT */ 230 else 231 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ 232 233 switch (ctinfo) { 234 case IP_CT_RELATED: 235 case IP_CT_RELATED_REPLY: 236 if (skb->protocol == htons(ETH_P_IP) && 237 ip_hdr(skb)->protocol == IPPROTO_ICMP) { 238 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 239 hooknum)) 240 err = NF_DROP; 241 goto out; 242 } else if (IS_ENABLED(CONFIG_IPV6) && 243 skb->protocol == htons(ETH_P_IPV6)) { 244 __be16 frag_off; 245 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 246 int hdrlen = ipv6_skip_exthdr(skb, 247 sizeof(struct ipv6hdr), 248 &nexthdr, &frag_off); 249 250 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 251 if (!nf_nat_icmpv6_reply_translation(skb, ct, 252 ctinfo, 253 hooknum, 254 hdrlen)) 255 err = NF_DROP; 256 goto out; 257 } 258 } 259 /* Non-ICMP, fall thru to initialize if needed. */ 260 /* fall through */ 261 case IP_CT_NEW: 262 /* Seen it before? This can happen for loopback, retrans, 263 * or local packets. 264 */ 265 if (!nf_nat_initialized(ct, maniptype)) { 266 /* Initialize according to the NAT action. */ 267 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) 268 /* Action is set up to establish a new 269 * mapping. 270 */ 271 ? nf_nat_setup_info(ct, range, maniptype) 272 : nf_nat_alloc_null_binding(ct, hooknum); 273 if (err != NF_ACCEPT) 274 goto out; 275 } 276 break; 277 278 case IP_CT_ESTABLISHED: 279 case IP_CT_ESTABLISHED_REPLY: 280 break; 281 282 default: 283 err = NF_DROP; 284 goto out; 285 } 286 287 err = nf_nat_packet(ct, ctinfo, hooknum, skb); 288 out: 289 return err; 290 } 291 #endif /* CONFIG_NF_NAT */ 292 293 static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) 294 { 295 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 296 u32 new_mark; 297 298 if (!mask) 299 return; 300 301 new_mark = mark | (ct->mark & ~(mask)); 302 if (ct->mark != new_mark) { 303 ct->mark = new_mark; 304 if (nf_ct_is_confirmed(ct)) 305 nf_conntrack_event_cache(IPCT_MARK, ct); 306 } 307 #endif 308 } 309 310 static void tcf_ct_act_set_labels(struct nf_conn *ct, 311 u32 *labels, 312 u32 *labels_m) 313 { 314 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) 315 size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels); 316 317 if (!memchr_inv(labels_m, 0, labels_sz)) 318 return; 319 320 nf_connlabels_replace(ct, labels, labels_m, 4); 321 #endif 322 } 323 324 static int tcf_ct_act_nat(struct sk_buff *skb, 325 struct nf_conn *ct, 326 enum ip_conntrack_info ctinfo, 327 int ct_action, 328 struct nf_nat_range2 *range, 329 bool commit) 330 { 331 #if IS_ENABLED(CONFIG_NF_NAT) 332 enum nf_nat_manip_type maniptype; 333 334 if (!(ct_action & TCA_CT_ACT_NAT)) 335 return NF_ACCEPT; 336 337 /* Add NAT extension if not confirmed yet. */ 338 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) 339 return NF_DROP; /* Can't NAT. */ 340 341 if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) && 342 (ctinfo != IP_CT_RELATED || commit)) { 343 /* NAT an established or related connection like before. */ 344 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 345 /* This is the REPLY direction for a connection 346 * for which NAT was applied in the forward 347 * direction. Do the reverse NAT. 348 */ 349 maniptype = ct->status & IPS_SRC_NAT 350 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; 351 else 352 maniptype = ct->status & IPS_SRC_NAT 353 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; 354 } else if (ct_action & TCA_CT_ACT_NAT_SRC) { 355 maniptype = NF_NAT_MANIP_SRC; 356 } else if (ct_action & TCA_CT_ACT_NAT_DST) { 357 maniptype = NF_NAT_MANIP_DST; 358 } else { 359 return NF_ACCEPT; 360 } 361 362 return ct_nat_execute(skb, ct, ctinfo, range, maniptype); 363 #else 364 return NF_ACCEPT; 365 #endif 366 } 367 368 static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, 369 struct tcf_result *res) 370 { 371 struct net *net = dev_net(skb->dev); 372 bool cached, commit, clear, force; 373 enum ip_conntrack_info ctinfo; 374 struct tcf_ct *c = to_ct(a); 375 struct nf_conn *tmpl = NULL; 376 struct nf_hook_state state; 377 int nh_ofs, err, retval; 378 struct tcf_ct_params *p; 379 struct nf_conn *ct; 380 u8 family; 381 382 p = rcu_dereference_bh(c->params); 383 384 retval = READ_ONCE(c->tcf_action); 385 commit = p->ct_action & TCA_CT_ACT_COMMIT; 386 clear = p->ct_action & TCA_CT_ACT_CLEAR; 387 force = p->ct_action & TCA_CT_ACT_FORCE; 388 tmpl = p->tmpl; 389 390 if (clear) { 391 ct = nf_ct_get(skb, &ctinfo); 392 if (ct) { 393 nf_conntrack_put(&ct->ct_general); 394 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 395 } 396 397 goto out; 398 } 399 400 family = tcf_ct_skb_nf_family(skb); 401 if (family == NFPROTO_UNSPEC) 402 goto drop; 403 404 /* The conntrack module expects to be working at L3. 405 * We also try to pull the IPv4/6 header to linear area 406 */ 407 nh_ofs = skb_network_offset(skb); 408 skb_pull_rcsum(skb, nh_ofs); 409 err = tcf_ct_handle_fragments(net, skb, family, p->zone); 410 if (err == -EINPROGRESS) { 411 retval = TC_ACT_STOLEN; 412 goto out; 413 } 414 if (err) 415 goto drop; 416 417 err = tcf_ct_skb_network_trim(skb, family); 418 if (err) 419 goto drop; 420 421 /* If we are recirculating packets to match on ct fields and 422 * committing with a separate ct action, then we don't need to 423 * actually run the packet through conntrack twice unless it's for a 424 * different zone. 425 */ 426 cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); 427 if (!cached) { 428 /* Associate skb with specified zone. */ 429 if (tmpl) { 430 ct = nf_ct_get(skb, &ctinfo); 431 if (skb_nfct(skb)) 432 nf_conntrack_put(skb_nfct(skb)); 433 nf_conntrack_get(&tmpl->ct_general); 434 nf_ct_set(skb, tmpl, IP_CT_NEW); 435 } 436 437 state.hook = NF_INET_PRE_ROUTING; 438 state.net = net; 439 state.pf = family; 440 err = nf_conntrack_in(skb, &state); 441 if (err != NF_ACCEPT) 442 goto out_push; 443 } 444 445 ct = nf_ct_get(skb, &ctinfo); 446 if (!ct) 447 goto out_push; 448 nf_ct_deliver_cached_events(ct); 449 450 err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit); 451 if (err != NF_ACCEPT) 452 goto drop; 453 454 if (commit) { 455 tcf_ct_act_set_mark(ct, p->mark, p->mark_mask); 456 tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); 457 458 /* This will take care of sending queued events 459 * even if the connection is already confirmed. 460 */ 461 nf_conntrack_confirm(skb); 462 } 463 464 out_push: 465 skb_push_rcsum(skb, nh_ofs); 466 467 out: 468 tcf_action_update_bstats(&c->common, skb); 469 return retval; 470 471 drop: 472 tcf_action_inc_drop_qstats(&c->common); 473 return TC_ACT_SHOT; 474 } 475 476 static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { 477 [TCA_CT_ACTION] = { .type = NLA_U16 }, 478 [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) }, 479 [TCA_CT_ZONE] = { .type = NLA_U16 }, 480 [TCA_CT_MARK] = { .type = NLA_U32 }, 481 [TCA_CT_MARK_MASK] = { .type = NLA_U32 }, 482 [TCA_CT_LABELS] = { .type = NLA_BINARY, 483 .len = 128 / BITS_PER_BYTE }, 484 [TCA_CT_LABELS_MASK] = { .type = NLA_BINARY, 485 .len = 128 / BITS_PER_BYTE }, 486 [TCA_CT_NAT_IPV4_MIN] = { .type = NLA_U32 }, 487 [TCA_CT_NAT_IPV4_MAX] = { .type = NLA_U32 }, 488 [TCA_CT_NAT_IPV6_MIN] = { .type = NLA_EXACT_LEN, 489 .len = sizeof(struct in6_addr) }, 490 [TCA_CT_NAT_IPV6_MAX] = { .type = NLA_EXACT_LEN, 491 .len = sizeof(struct in6_addr) }, 492 [TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 }, 493 [TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 }, 494 }; 495 496 static int tcf_ct_fill_params_nat(struct tcf_ct_params *p, 497 struct tc_ct *parm, 498 struct nlattr **tb, 499 struct netlink_ext_ack *extack) 500 { 501 struct nf_nat_range2 *range; 502 503 if (!(p->ct_action & TCA_CT_ACT_NAT)) 504 return 0; 505 506 if (!IS_ENABLED(CONFIG_NF_NAT)) { 507 NL_SET_ERR_MSG_MOD(extack, "Netfilter nat isn't enabled in kernel"); 508 return -EOPNOTSUPP; 509 } 510 511 if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) 512 return 0; 513 514 if ((p->ct_action & TCA_CT_ACT_NAT_SRC) && 515 (p->ct_action & TCA_CT_ACT_NAT_DST)) { 516 NL_SET_ERR_MSG_MOD(extack, "dnat and snat can't be enabled at the same time"); 517 return -EOPNOTSUPP; 518 } 519 520 range = &p->range; 521 if (tb[TCA_CT_NAT_IPV4_MIN]) { 522 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV4_MAX]; 523 524 p->ipv4_range = true; 525 range->flags |= NF_NAT_RANGE_MAP_IPS; 526 range->min_addr.ip = 527 nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]); 528 529 range->max_addr.ip = max_attr ? 530 nla_get_in_addr(max_attr) : 531 range->min_addr.ip; 532 } else if (tb[TCA_CT_NAT_IPV6_MIN]) { 533 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX]; 534 535 p->ipv4_range = false; 536 range->flags |= NF_NAT_RANGE_MAP_IPS; 537 range->min_addr.in6 = 538 nla_get_in6_addr(tb[TCA_CT_NAT_IPV6_MIN]); 539 540 range->max_addr.in6 = max_attr ? 541 nla_get_in6_addr(max_attr) : 542 range->min_addr.in6; 543 } 544 545 if (tb[TCA_CT_NAT_PORT_MIN]) { 546 range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 547 range->min_proto.all = nla_get_be16(tb[TCA_CT_NAT_PORT_MIN]); 548 549 range->max_proto.all = tb[TCA_CT_NAT_PORT_MAX] ? 550 nla_get_be16(tb[TCA_CT_NAT_PORT_MAX]) : 551 range->min_proto.all; 552 } 553 554 return 0; 555 } 556 557 static void tcf_ct_set_key_val(struct nlattr **tb, 558 void *val, int val_type, 559 void *mask, int mask_type, 560 int len) 561 { 562 if (!tb[val_type]) 563 return; 564 nla_memcpy(val, tb[val_type], len); 565 566 if (!mask) 567 return; 568 569 if (mask_type == TCA_CT_UNSPEC || !tb[mask_type]) 570 memset(mask, 0xff, len); 571 else 572 nla_memcpy(mask, tb[mask_type], len); 573 } 574 575 static int tcf_ct_fill_params(struct net *net, 576 struct tcf_ct_params *p, 577 struct tc_ct *parm, 578 struct nlattr **tb, 579 struct netlink_ext_ack *extack) 580 { 581 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 582 struct nf_conntrack_zone zone; 583 struct nf_conn *tmpl; 584 int err; 585 586 p->zone = NF_CT_DEFAULT_ZONE_ID; 587 588 tcf_ct_set_key_val(tb, 589 &p->ct_action, TCA_CT_ACTION, 590 NULL, TCA_CT_UNSPEC, 591 sizeof(p->ct_action)); 592 593 if (p->ct_action & TCA_CT_ACT_CLEAR) 594 return 0; 595 596 err = tcf_ct_fill_params_nat(p, parm, tb, extack); 597 if (err) 598 return err; 599 600 if (tb[TCA_CT_MARK]) { 601 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) { 602 NL_SET_ERR_MSG_MOD(extack, "Conntrack mark isn't enabled."); 603 return -EOPNOTSUPP; 604 } 605 tcf_ct_set_key_val(tb, 606 &p->mark, TCA_CT_MARK, 607 &p->mark_mask, TCA_CT_MARK_MASK, 608 sizeof(p->mark)); 609 } 610 611 if (tb[TCA_CT_LABELS]) { 612 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { 613 NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); 614 return -EOPNOTSUPP; 615 } 616 617 if (!tn->labels) { 618 NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); 619 return -EOPNOTSUPP; 620 } 621 tcf_ct_set_key_val(tb, 622 p->labels, TCA_CT_LABELS, 623 p->labels_mask, TCA_CT_LABELS_MASK, 624 sizeof(p->labels)); 625 } 626 627 if (tb[TCA_CT_ZONE]) { 628 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { 629 NL_SET_ERR_MSG_MOD(extack, "Conntrack zones isn't enabled."); 630 return -EOPNOTSUPP; 631 } 632 633 tcf_ct_set_key_val(tb, 634 &p->zone, TCA_CT_ZONE, 635 NULL, TCA_CT_UNSPEC, 636 sizeof(p->zone)); 637 } 638 639 if (p->zone == NF_CT_DEFAULT_ZONE_ID) 640 return 0; 641 642 nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); 643 tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); 644 if (!tmpl) { 645 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template"); 646 return -ENOMEM; 647 } 648 __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); 649 nf_conntrack_get(&tmpl->ct_general); 650 p->tmpl = tmpl; 651 652 return 0; 653 } 654 655 static int tcf_ct_init(struct net *net, struct nlattr *nla, 656 struct nlattr *est, struct tc_action **a, 657 int replace, int bind, bool rtnl_held, 658 struct tcf_proto *tp, u32 flags, 659 struct netlink_ext_ack *extack) 660 { 661 struct tc_action_net *tn = net_generic(net, ct_net_id); 662 struct tcf_ct_params *params = NULL; 663 struct nlattr *tb[TCA_CT_MAX + 1]; 664 struct tcf_chain *goto_ch = NULL; 665 struct tc_ct *parm; 666 struct tcf_ct *c; 667 int err, res = 0; 668 u32 index; 669 670 if (!nla) { 671 NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed"); 672 return -EINVAL; 673 } 674 675 err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack); 676 if (err < 0) 677 return err; 678 679 if (!tb[TCA_CT_PARMS]) { 680 NL_SET_ERR_MSG_MOD(extack, "Missing required ct parameters"); 681 return -EINVAL; 682 } 683 parm = nla_data(tb[TCA_CT_PARMS]); 684 index = parm->index; 685 err = tcf_idr_check_alloc(tn, &index, a, bind); 686 if (err < 0) 687 return err; 688 689 if (!err) { 690 err = tcf_idr_create_from_flags(tn, index, est, a, 691 &act_ct_ops, bind, flags); 692 if (err) { 693 tcf_idr_cleanup(tn, index); 694 return err; 695 } 696 res = ACT_P_CREATED; 697 } else { 698 if (bind) 699 return 0; 700 701 if (!replace) { 702 tcf_idr_release(*a, bind); 703 return -EEXIST; 704 } 705 } 706 err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); 707 if (err < 0) 708 goto cleanup; 709 710 c = to_ct(*a); 711 712 params = kzalloc(sizeof(*params), GFP_KERNEL); 713 if (unlikely(!params)) { 714 err = -ENOMEM; 715 goto cleanup; 716 } 717 718 err = tcf_ct_fill_params(net, params, parm, tb, extack); 719 if (err) 720 goto cleanup; 721 722 spin_lock_bh(&c->tcf_lock); 723 goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); 724 params = rcu_replace_pointer(c->params, params, 725 lockdep_is_held(&c->tcf_lock)); 726 spin_unlock_bh(&c->tcf_lock); 727 728 if (goto_ch) 729 tcf_chain_put_by_act(goto_ch); 730 if (params) 731 kfree_rcu(params, rcu); 732 if (res == ACT_P_CREATED) 733 tcf_idr_insert(tn, *a); 734 735 return res; 736 737 cleanup: 738 if (goto_ch) 739 tcf_chain_put_by_act(goto_ch); 740 kfree(params); 741 tcf_idr_release(*a, bind); 742 return err; 743 } 744 745 static void tcf_ct_cleanup(struct tc_action *a) 746 { 747 struct tcf_ct_params *params; 748 struct tcf_ct *c = to_ct(a); 749 750 params = rcu_dereference_protected(c->params, 1); 751 if (params) 752 call_rcu(¶ms->rcu, tcf_ct_params_free); 753 } 754 755 static int tcf_ct_dump_key_val(struct sk_buff *skb, 756 void *val, int val_type, 757 void *mask, int mask_type, 758 int len) 759 { 760 int err; 761 762 if (mask && !memchr_inv(mask, 0, len)) 763 return 0; 764 765 err = nla_put(skb, val_type, len, val); 766 if (err) 767 return err; 768 769 if (mask_type != TCA_CT_UNSPEC) { 770 err = nla_put(skb, mask_type, len, mask); 771 if (err) 772 return err; 773 } 774 775 return 0; 776 } 777 778 static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p) 779 { 780 struct nf_nat_range2 *range = &p->range; 781 782 if (!(p->ct_action & TCA_CT_ACT_NAT)) 783 return 0; 784 785 if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) 786 return 0; 787 788 if (range->flags & NF_NAT_RANGE_MAP_IPS) { 789 if (p->ipv4_range) { 790 if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MIN, 791 range->min_addr.ip)) 792 return -1; 793 if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MAX, 794 range->max_addr.ip)) 795 return -1; 796 } else { 797 if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MIN, 798 &range->min_addr.in6)) 799 return -1; 800 if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MAX, 801 &range->max_addr.in6)) 802 return -1; 803 } 804 } 805 806 if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { 807 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MIN, 808 range->min_proto.all)) 809 return -1; 810 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MAX, 811 range->max_proto.all)) 812 return -1; 813 } 814 815 return 0; 816 } 817 818 static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, 819 int bind, int ref) 820 { 821 unsigned char *b = skb_tail_pointer(skb); 822 struct tcf_ct *c = to_ct(a); 823 struct tcf_ct_params *p; 824 825 struct tc_ct opt = { 826 .index = c->tcf_index, 827 .refcnt = refcount_read(&c->tcf_refcnt) - ref, 828 .bindcnt = atomic_read(&c->tcf_bindcnt) - bind, 829 }; 830 struct tcf_t t; 831 832 spin_lock_bh(&c->tcf_lock); 833 p = rcu_dereference_protected(c->params, 834 lockdep_is_held(&c->tcf_lock)); 835 opt.action = c->tcf_action; 836 837 if (tcf_ct_dump_key_val(skb, 838 &p->ct_action, TCA_CT_ACTION, 839 NULL, TCA_CT_UNSPEC, 840 sizeof(p->ct_action))) 841 goto nla_put_failure; 842 843 if (p->ct_action & TCA_CT_ACT_CLEAR) 844 goto skip_dump; 845 846 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 847 tcf_ct_dump_key_val(skb, 848 &p->mark, TCA_CT_MARK, 849 &p->mark_mask, TCA_CT_MARK_MASK, 850 sizeof(p->mark))) 851 goto nla_put_failure; 852 853 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 854 tcf_ct_dump_key_val(skb, 855 p->labels, TCA_CT_LABELS, 856 p->labels_mask, TCA_CT_LABELS_MASK, 857 sizeof(p->labels))) 858 goto nla_put_failure; 859 860 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 861 tcf_ct_dump_key_val(skb, 862 &p->zone, TCA_CT_ZONE, 863 NULL, TCA_CT_UNSPEC, 864 sizeof(p->zone))) 865 goto nla_put_failure; 866 867 if (tcf_ct_dump_nat(skb, p)) 868 goto nla_put_failure; 869 870 skip_dump: 871 if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt)) 872 goto nla_put_failure; 873 874 tcf_tm_dump(&t, &c->tcf_tm); 875 if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD)) 876 goto nla_put_failure; 877 spin_unlock_bh(&c->tcf_lock); 878 879 return skb->len; 880 nla_put_failure: 881 spin_unlock_bh(&c->tcf_lock); 882 nlmsg_trim(skb, b); 883 return -1; 884 } 885 886 static int tcf_ct_walker(struct net *net, struct sk_buff *skb, 887 struct netlink_callback *cb, int type, 888 const struct tc_action_ops *ops, 889 struct netlink_ext_ack *extack) 890 { 891 struct tc_action_net *tn = net_generic(net, ct_net_id); 892 893 return tcf_generic_walker(tn, skb, cb, type, ops, extack); 894 } 895 896 static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) 897 { 898 struct tc_action_net *tn = net_generic(net, ct_net_id); 899 900 return tcf_idr_search(tn, a, index); 901 } 902 903 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, 904 u64 lastuse, bool hw) 905 { 906 struct tcf_ct *c = to_ct(a); 907 908 tcf_action_update_stats(a, bytes, packets, false, hw); 909 c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); 910 } 911 912 static struct tc_action_ops act_ct_ops = { 913 .kind = "ct", 914 .id = TCA_ID_CT, 915 .owner = THIS_MODULE, 916 .act = tcf_ct_act, 917 .dump = tcf_ct_dump, 918 .init = tcf_ct_init, 919 .cleanup = tcf_ct_cleanup, 920 .walk = tcf_ct_walker, 921 .lookup = tcf_ct_search, 922 .stats_update = tcf_stats_update, 923 .size = sizeof(struct tcf_ct), 924 }; 925 926 static __net_init int ct_init_net(struct net *net) 927 { 928 unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8; 929 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 930 931 if (nf_connlabels_get(net, n_bits - 1)) { 932 tn->labels = false; 933 pr_err("act_ct: Failed to set connlabels length"); 934 } else { 935 tn->labels = true; 936 } 937 938 return tc_action_net_init(net, &tn->tn, &act_ct_ops); 939 } 940 941 static void __net_exit ct_exit_net(struct list_head *net_list) 942 { 943 struct net *net; 944 945 rtnl_lock(); 946 list_for_each_entry(net, net_list, exit_list) { 947 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 948 949 if (tn->labels) 950 nf_connlabels_put(net); 951 } 952 rtnl_unlock(); 953 954 tc_action_net_exit(net_list, ct_net_id); 955 } 956 957 static struct pernet_operations ct_net_ops = { 958 .init = ct_init_net, 959 .exit_batch = ct_exit_net, 960 .id = &ct_net_id, 961 .size = sizeof(struct tc_ct_action_net), 962 }; 963 964 static int __init ct_init_module(void) 965 { 966 return tcf_register_action(&act_ct_ops, &ct_net_ops); 967 } 968 969 static void __exit ct_cleanup_module(void) 970 { 971 tcf_unregister_action(&act_ct_ops, &ct_net_ops); 972 } 973 974 module_init(ct_init_module); 975 module_exit(ct_cleanup_module); 976 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>"); 977 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>"); 978 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>"); 979 MODULE_DESCRIPTION("Connection tracking action"); 980 MODULE_LICENSE("GPL v2"); 981 982