1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* - 3 * net/sched/act_ct.c Connection Tracking action 4 * 5 * Authors: Paul Blakey <paulb@mellanox.com> 6 * Yossi Kuperman <yossiku@mellanox.com> 7 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> 8 */ 9 10 #include <linux/module.h> 11 #include <linux/init.h> 12 #include <linux/kernel.h> 13 #include <linux/skbuff.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/pkt_cls.h> 16 #include <linux/ip.h> 17 #include <linux/ipv6.h> 18 #include <net/netlink.h> 19 #include <net/pkt_sched.h> 20 #include <net/pkt_cls.h> 21 #include <net/act_api.h> 22 #include <net/ip.h> 23 #include <net/ipv6_frag.h> 24 #include <uapi/linux/tc_act/tc_ct.h> 25 #include <net/tc_act/tc_ct.h> 26 27 #include <net/netfilter/nf_conntrack.h> 28 #include <net/netfilter/nf_conntrack_core.h> 29 #include <net/netfilter/nf_conntrack_zones.h> 30 #include <net/netfilter/nf_conntrack_helper.h> 31 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 32 #include <uapi/linux/netfilter/nf_nat.h> 33 34 static struct tc_action_ops act_ct_ops; 35 static unsigned int ct_net_id; 36 37 struct tc_ct_action_net { 38 struct tc_action_net tn; /* Must be first */ 39 bool labels; 40 }; 41 42 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ 43 static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, 44 u16 zone_id, bool force) 45 { 46 enum ip_conntrack_info ctinfo; 47 struct nf_conn *ct; 48 49 ct = nf_ct_get(skb, &ctinfo); 50 if (!ct) 51 return false; 52 if (!net_eq(net, read_pnet(&ct->ct_net))) 53 return false; 54 if (nf_ct_zone(ct)->id != zone_id) 55 return false; 56 57 /* Force conntrack entry direction. */ 58 if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 59 if (nf_ct_is_confirmed(ct)) 60 nf_ct_kill(ct); 61 62 nf_conntrack_put(&ct->ct_general); 63 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 64 65 return false; 66 } 67 68 return true; 69 } 70 71 /* Trim the skb to the length specified by the IP/IPv6 header, 72 * removing any trailing lower-layer padding. This prepares the skb 73 * for higher-layer processing that assumes skb->len excludes padding 74 * (such as nf_ip_checksum). The caller needs to pull the skb to the 75 * network header, and ensure ip_hdr/ipv6_hdr points to valid data. 76 */ 77 static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) 78 { 79 unsigned int len; 80 int err; 81 82 switch (family) { 83 case NFPROTO_IPV4: 84 len = ntohs(ip_hdr(skb)->tot_len); 85 break; 86 case NFPROTO_IPV6: 87 len = sizeof(struct ipv6hdr) 88 + ntohs(ipv6_hdr(skb)->payload_len); 89 break; 90 default: 91 len = skb->len; 92 } 93 94 err = pskb_trim_rcsum(skb, len); 95 96 return err; 97 } 98 99 static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) 100 { 101 u8 family = NFPROTO_UNSPEC; 102 103 switch (skb->protocol) { 104 case htons(ETH_P_IP): 105 family = NFPROTO_IPV4; 106 break; 107 case htons(ETH_P_IPV6): 108 family = NFPROTO_IPV6; 109 break; 110 default: 111 break; 112 } 113 114 return family; 115 } 116 117 static int tcf_ct_ipv4_is_fragment(struct sk_buff *skb, bool *frag) 118 { 119 unsigned int len; 120 121 len = skb_network_offset(skb) + sizeof(struct iphdr); 122 if (unlikely(skb->len < len)) 123 return -EINVAL; 124 if (unlikely(!pskb_may_pull(skb, len))) 125 return -ENOMEM; 126 127 *frag = ip_is_fragment(ip_hdr(skb)); 128 return 0; 129 } 130 131 static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) 132 { 133 unsigned int flags = 0, len, payload_ofs = 0; 134 unsigned short frag_off; 135 int nexthdr; 136 137 len = skb_network_offset(skb) + sizeof(struct ipv6hdr); 138 if (unlikely(skb->len < len)) 139 return -EINVAL; 140 if (unlikely(!pskb_may_pull(skb, len))) 141 return -ENOMEM; 142 143 nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags); 144 if (unlikely(nexthdr < 0)) 145 return -EPROTO; 146 147 *frag = flags & IP6_FH_F_FRAG; 148 return 0; 149 } 150 151 static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, 152 u8 family, u16 zone) 153 { 154 enum ip_conntrack_info ctinfo; 155 struct nf_conn *ct; 156 int err = 0; 157 bool frag; 158 159 /* Previously seen (loopback)? Ignore. */ 160 ct = nf_ct_get(skb, &ctinfo); 161 if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) 162 return 0; 163 164 if (family == NFPROTO_IPV4) 165 err = tcf_ct_ipv4_is_fragment(skb, &frag); 166 else 167 err = tcf_ct_ipv6_is_fragment(skb, &frag); 168 if (err || !frag) 169 return err; 170 171 skb_get(skb); 172 173 if (family == NFPROTO_IPV4) { 174 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; 175 176 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 177 local_bh_disable(); 178 err = ip_defrag(net, skb, user); 179 local_bh_enable(); 180 if (err && err != -EINPROGRESS) 181 goto out_free; 182 } else { /* NFPROTO_IPV6 */ 183 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 184 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 185 186 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 187 err = nf_ct_frag6_gather(net, skb, user); 188 if (err && err != -EINPROGRESS) 189 goto out_free; 190 #else 191 err = -EOPNOTSUPP; 192 goto out_free; 193 #endif 194 } 195 196 skb_clear_hash(skb); 197 skb->ignore_df = 1; 198 return err; 199 200 out_free: 201 kfree_skb(skb); 202 return err; 203 } 204 205 static void tcf_ct_params_free(struct rcu_head *head) 206 { 207 struct tcf_ct_params *params = container_of(head, 208 struct tcf_ct_params, rcu); 209 210 if (params->tmpl) 211 nf_conntrack_put(¶ms->tmpl->ct_general); 212 kfree(params); 213 } 214 215 #if IS_ENABLED(CONFIG_NF_NAT) 216 /* Modelled after nf_nat_ipv[46]_fn(). 217 * range is only used for new, uninitialized NAT state. 218 * Returns either NF_ACCEPT or NF_DROP. 219 */ 220 static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 221 enum ip_conntrack_info ctinfo, 222 const struct nf_nat_range2 *range, 223 enum nf_nat_manip_type maniptype) 224 { 225 int hooknum, err = NF_ACCEPT; 226 227 /* See HOOK2MANIP(). */ 228 if (maniptype == NF_NAT_MANIP_SRC) 229 hooknum = NF_INET_LOCAL_IN; /* Source NAT */ 230 else 231 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ 232 233 switch (ctinfo) { 234 case IP_CT_RELATED: 235 case IP_CT_RELATED_REPLY: 236 if (skb->protocol == htons(ETH_P_IP) && 237 ip_hdr(skb)->protocol == IPPROTO_ICMP) { 238 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 239 hooknum)) 240 err = NF_DROP; 241 goto out; 242 } else if (IS_ENABLED(CONFIG_IPV6) && 243 skb->protocol == htons(ETH_P_IPV6)) { 244 __be16 frag_off; 245 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 246 int hdrlen = ipv6_skip_exthdr(skb, 247 sizeof(struct ipv6hdr), 248 &nexthdr, &frag_off); 249 250 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 251 if (!nf_nat_icmpv6_reply_translation(skb, ct, 252 ctinfo, 253 hooknum, 254 hdrlen)) 255 err = NF_DROP; 256 goto out; 257 } 258 } 259 /* Non-ICMP, fall thru to initialize if needed. */ 260 /* fall through */ 261 case IP_CT_NEW: 262 /* Seen it before? This can happen for loopback, retrans, 263 * or local packets. 264 */ 265 if (!nf_nat_initialized(ct, maniptype)) { 266 /* Initialize according to the NAT action. */ 267 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) 268 /* Action is set up to establish a new 269 * mapping. 270 */ 271 ? nf_nat_setup_info(ct, range, maniptype) 272 : nf_nat_alloc_null_binding(ct, hooknum); 273 if (err != NF_ACCEPT) 274 goto out; 275 } 276 break; 277 278 case IP_CT_ESTABLISHED: 279 case IP_CT_ESTABLISHED_REPLY: 280 break; 281 282 default: 283 err = NF_DROP; 284 goto out; 285 } 286 287 err = nf_nat_packet(ct, ctinfo, hooknum, skb); 288 out: 289 return err; 290 } 291 #endif /* CONFIG_NF_NAT */ 292 293 static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) 294 { 295 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 296 u32 new_mark; 297 298 if (!mask) 299 return; 300 301 new_mark = mark | (ct->mark & ~(mask)); 302 if (ct->mark != new_mark) { 303 ct->mark = new_mark; 304 if (nf_ct_is_confirmed(ct)) 305 nf_conntrack_event_cache(IPCT_MARK, ct); 306 } 307 #endif 308 } 309 310 static void tcf_ct_act_set_labels(struct nf_conn *ct, 311 u32 *labels, 312 u32 *labels_m) 313 { 314 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) 315 size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels); 316 317 if (!memchr_inv(labels_m, 0, labels_sz)) 318 return; 319 320 nf_connlabels_replace(ct, labels, labels_m, 4); 321 #endif 322 } 323 324 static int tcf_ct_act_nat(struct sk_buff *skb, 325 struct nf_conn *ct, 326 enum ip_conntrack_info ctinfo, 327 int ct_action, 328 struct nf_nat_range2 *range, 329 bool commit) 330 { 331 #if IS_ENABLED(CONFIG_NF_NAT) 332 enum nf_nat_manip_type maniptype; 333 334 if (!(ct_action & TCA_CT_ACT_NAT)) 335 return NF_ACCEPT; 336 337 /* Add NAT extension if not confirmed yet. */ 338 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) 339 return NF_DROP; /* Can't NAT. */ 340 341 if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) && 342 (ctinfo != IP_CT_RELATED || commit)) { 343 /* NAT an established or related connection like before. */ 344 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 345 /* This is the REPLY direction for a connection 346 * for which NAT was applied in the forward 347 * direction. Do the reverse NAT. 348 */ 349 maniptype = ct->status & IPS_SRC_NAT 350 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; 351 else 352 maniptype = ct->status & IPS_SRC_NAT 353 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; 354 } else if (ct_action & TCA_CT_ACT_NAT_SRC) { 355 maniptype = NF_NAT_MANIP_SRC; 356 } else if (ct_action & TCA_CT_ACT_NAT_DST) { 357 maniptype = NF_NAT_MANIP_DST; 358 } else { 359 return NF_ACCEPT; 360 } 361 362 return ct_nat_execute(skb, ct, ctinfo, range, maniptype); 363 #else 364 return NF_ACCEPT; 365 #endif 366 } 367 368 static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, 369 struct tcf_result *res) 370 { 371 struct net *net = dev_net(skb->dev); 372 bool cached, commit, clear, force; 373 enum ip_conntrack_info ctinfo; 374 struct tcf_ct *c = to_ct(a); 375 struct nf_conn *tmpl = NULL; 376 struct nf_hook_state state; 377 int nh_ofs, err, retval; 378 struct tcf_ct_params *p; 379 struct nf_conn *ct; 380 u8 family; 381 382 p = rcu_dereference_bh(c->params); 383 384 retval = READ_ONCE(c->tcf_action); 385 commit = p->ct_action & TCA_CT_ACT_COMMIT; 386 clear = p->ct_action & TCA_CT_ACT_CLEAR; 387 force = p->ct_action & TCA_CT_ACT_FORCE; 388 tmpl = p->tmpl; 389 390 if (clear) { 391 ct = nf_ct_get(skb, &ctinfo); 392 if (ct) { 393 nf_conntrack_put(&ct->ct_general); 394 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 395 } 396 397 goto out; 398 } 399 400 family = tcf_ct_skb_nf_family(skb); 401 if (family == NFPROTO_UNSPEC) 402 goto drop; 403 404 /* The conntrack module expects to be working at L3. 405 * We also try to pull the IPv4/6 header to linear area 406 */ 407 nh_ofs = skb_network_offset(skb); 408 skb_pull_rcsum(skb, nh_ofs); 409 err = tcf_ct_handle_fragments(net, skb, family, p->zone); 410 if (err == -EINPROGRESS) { 411 retval = TC_ACT_STOLEN; 412 goto out; 413 } 414 if (err) 415 goto drop; 416 417 err = tcf_ct_skb_network_trim(skb, family); 418 if (err) 419 goto drop; 420 421 /* If we are recirculating packets to match on ct fields and 422 * committing with a separate ct action, then we don't need to 423 * actually run the packet through conntrack twice unless it's for a 424 * different zone. 425 */ 426 cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); 427 if (!cached) { 428 /* Associate skb with specified zone. */ 429 if (tmpl) { 430 ct = nf_ct_get(skb, &ctinfo); 431 if (skb_nfct(skb)) 432 nf_conntrack_put(skb_nfct(skb)); 433 nf_conntrack_get(&tmpl->ct_general); 434 nf_ct_set(skb, tmpl, IP_CT_NEW); 435 } 436 437 state.hook = NF_INET_PRE_ROUTING; 438 state.net = net; 439 state.pf = family; 440 err = nf_conntrack_in(skb, &state); 441 if (err != NF_ACCEPT) 442 goto out_push; 443 } 444 445 ct = nf_ct_get(skb, &ctinfo); 446 if (!ct) 447 goto out_push; 448 nf_ct_deliver_cached_events(ct); 449 450 err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit); 451 if (err != NF_ACCEPT) 452 goto drop; 453 454 if (commit) { 455 tcf_ct_act_set_mark(ct, p->mark, p->mark_mask); 456 tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); 457 458 /* This will take care of sending queued events 459 * even if the connection is already confirmed. 460 */ 461 nf_conntrack_confirm(skb); 462 } 463 464 out_push: 465 skb_push_rcsum(skb, nh_ofs); 466 467 out: 468 bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb); 469 return retval; 470 471 drop: 472 qstats_drop_inc(this_cpu_ptr(a->cpu_qstats)); 473 return TC_ACT_SHOT; 474 } 475 476 static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { 477 [TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 }, 478 [TCA_CT_ACTION] = { .type = NLA_U16 }, 479 [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) }, 480 [TCA_CT_ZONE] = { .type = NLA_U16 }, 481 [TCA_CT_MARK] = { .type = NLA_U32 }, 482 [TCA_CT_MARK_MASK] = { .type = NLA_U32 }, 483 [TCA_CT_LABELS] = { .type = NLA_BINARY, 484 .len = 128 / BITS_PER_BYTE }, 485 [TCA_CT_LABELS_MASK] = { .type = NLA_BINARY, 486 .len = 128 / BITS_PER_BYTE }, 487 [TCA_CT_NAT_IPV4_MIN] = { .type = NLA_U32 }, 488 [TCA_CT_NAT_IPV4_MAX] = { .type = NLA_U32 }, 489 [TCA_CT_NAT_IPV6_MIN] = { .type = NLA_EXACT_LEN, 490 .len = sizeof(struct in6_addr) }, 491 [TCA_CT_NAT_IPV6_MAX] = { .type = NLA_EXACT_LEN, 492 .len = sizeof(struct in6_addr) }, 493 [TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 }, 494 [TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 }, 495 }; 496 497 static int tcf_ct_fill_params_nat(struct tcf_ct_params *p, 498 struct tc_ct *parm, 499 struct nlattr **tb, 500 struct netlink_ext_ack *extack) 501 { 502 struct nf_nat_range2 *range; 503 504 if (!(p->ct_action & TCA_CT_ACT_NAT)) 505 return 0; 506 507 if (!IS_ENABLED(CONFIG_NF_NAT)) { 508 NL_SET_ERR_MSG_MOD(extack, "Netfilter nat isn't enabled in kernel"); 509 return -EOPNOTSUPP; 510 } 511 512 if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) 513 return 0; 514 515 if ((p->ct_action & TCA_CT_ACT_NAT_SRC) && 516 (p->ct_action & TCA_CT_ACT_NAT_DST)) { 517 NL_SET_ERR_MSG_MOD(extack, "dnat and snat can't be enabled at the same time"); 518 return -EOPNOTSUPP; 519 } 520 521 range = &p->range; 522 if (tb[TCA_CT_NAT_IPV4_MIN]) { 523 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV4_MAX]; 524 525 p->ipv4_range = true; 526 range->flags |= NF_NAT_RANGE_MAP_IPS; 527 range->min_addr.ip = 528 nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]); 529 530 range->max_addr.ip = max_attr ? 531 nla_get_in_addr(max_attr) : 532 range->min_addr.ip; 533 } else if (tb[TCA_CT_NAT_IPV6_MIN]) { 534 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX]; 535 536 p->ipv4_range = false; 537 range->flags |= NF_NAT_RANGE_MAP_IPS; 538 range->min_addr.in6 = 539 nla_get_in6_addr(tb[TCA_CT_NAT_IPV6_MIN]); 540 541 range->max_addr.in6 = max_attr ? 542 nla_get_in6_addr(max_attr) : 543 range->min_addr.in6; 544 } 545 546 if (tb[TCA_CT_NAT_PORT_MIN]) { 547 range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 548 range->min_proto.all = nla_get_be16(tb[TCA_CT_NAT_PORT_MIN]); 549 550 range->max_proto.all = tb[TCA_CT_NAT_PORT_MAX] ? 551 nla_get_be16(tb[TCA_CT_NAT_PORT_MAX]) : 552 range->min_proto.all; 553 } 554 555 return 0; 556 } 557 558 static void tcf_ct_set_key_val(struct nlattr **tb, 559 void *val, int val_type, 560 void *mask, int mask_type, 561 int len) 562 { 563 if (!tb[val_type]) 564 return; 565 nla_memcpy(val, tb[val_type], len); 566 567 if (!mask) 568 return; 569 570 if (mask_type == TCA_CT_UNSPEC || !tb[mask_type]) 571 memset(mask, 0xff, len); 572 else 573 nla_memcpy(mask, tb[mask_type], len); 574 } 575 576 static int tcf_ct_fill_params(struct net *net, 577 struct tcf_ct_params *p, 578 struct tc_ct *parm, 579 struct nlattr **tb, 580 struct netlink_ext_ack *extack) 581 { 582 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 583 struct nf_conntrack_zone zone; 584 struct nf_conn *tmpl; 585 int err; 586 587 p->zone = NF_CT_DEFAULT_ZONE_ID; 588 589 tcf_ct_set_key_val(tb, 590 &p->ct_action, TCA_CT_ACTION, 591 NULL, TCA_CT_UNSPEC, 592 sizeof(p->ct_action)); 593 594 if (p->ct_action & TCA_CT_ACT_CLEAR) 595 return 0; 596 597 err = tcf_ct_fill_params_nat(p, parm, tb, extack); 598 if (err) 599 return err; 600 601 if (tb[TCA_CT_MARK]) { 602 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) { 603 NL_SET_ERR_MSG_MOD(extack, "Conntrack mark isn't enabled."); 604 return -EOPNOTSUPP; 605 } 606 tcf_ct_set_key_val(tb, 607 &p->mark, TCA_CT_MARK, 608 &p->mark_mask, TCA_CT_MARK_MASK, 609 sizeof(p->mark)); 610 } 611 612 if (tb[TCA_CT_LABELS]) { 613 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { 614 NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); 615 return -EOPNOTSUPP; 616 } 617 618 if (!tn->labels) { 619 NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); 620 return -EOPNOTSUPP; 621 } 622 tcf_ct_set_key_val(tb, 623 p->labels, TCA_CT_LABELS, 624 p->labels_mask, TCA_CT_LABELS_MASK, 625 sizeof(p->labels)); 626 } 627 628 if (tb[TCA_CT_ZONE]) { 629 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { 630 NL_SET_ERR_MSG_MOD(extack, "Conntrack zones isn't enabled."); 631 return -EOPNOTSUPP; 632 } 633 634 tcf_ct_set_key_val(tb, 635 &p->zone, TCA_CT_ZONE, 636 NULL, TCA_CT_UNSPEC, 637 sizeof(p->zone)); 638 } 639 640 if (p->zone == NF_CT_DEFAULT_ZONE_ID) 641 return 0; 642 643 nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); 644 tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); 645 if (!tmpl) { 646 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template"); 647 return -ENOMEM; 648 } 649 __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); 650 nf_conntrack_get(&tmpl->ct_general); 651 p->tmpl = tmpl; 652 653 return 0; 654 } 655 656 static int tcf_ct_init(struct net *net, struct nlattr *nla, 657 struct nlattr *est, struct tc_action **a, 658 int replace, int bind, bool rtnl_held, 659 struct tcf_proto *tp, 660 struct netlink_ext_ack *extack) 661 { 662 struct tc_action_net *tn = net_generic(net, ct_net_id); 663 struct tcf_ct_params *params = NULL; 664 struct nlattr *tb[TCA_CT_MAX + 1]; 665 struct tcf_chain *goto_ch = NULL; 666 struct tc_ct *parm; 667 struct tcf_ct *c; 668 int err, res = 0; 669 u32 index; 670 671 if (!nla) { 672 NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed"); 673 return -EINVAL; 674 } 675 676 err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack); 677 if (err < 0) 678 return err; 679 680 if (!tb[TCA_CT_PARMS]) { 681 NL_SET_ERR_MSG_MOD(extack, "Missing required ct parameters"); 682 return -EINVAL; 683 } 684 parm = nla_data(tb[TCA_CT_PARMS]); 685 index = parm->index; 686 err = tcf_idr_check_alloc(tn, &index, a, bind); 687 if (err < 0) 688 return err; 689 690 if (!err) { 691 err = tcf_idr_create(tn, index, est, a, 692 &act_ct_ops, bind, true); 693 if (err) { 694 tcf_idr_cleanup(tn, index); 695 return err; 696 } 697 res = ACT_P_CREATED; 698 } else { 699 if (bind) 700 return 0; 701 702 if (!replace) { 703 tcf_idr_release(*a, bind); 704 return -EEXIST; 705 } 706 } 707 err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); 708 if (err < 0) 709 goto cleanup; 710 711 c = to_ct(*a); 712 713 params = kzalloc(sizeof(*params), GFP_KERNEL); 714 if (unlikely(!params)) { 715 err = -ENOMEM; 716 goto cleanup; 717 } 718 719 err = tcf_ct_fill_params(net, params, parm, tb, extack); 720 if (err) 721 goto cleanup; 722 723 spin_lock_bh(&c->tcf_lock); 724 goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); 725 rcu_swap_protected(c->params, params, lockdep_is_held(&c->tcf_lock)); 726 spin_unlock_bh(&c->tcf_lock); 727 728 if (goto_ch) 729 tcf_chain_put_by_act(goto_ch); 730 if (params) 731 kfree_rcu(params, rcu); 732 if (res == ACT_P_CREATED) 733 tcf_idr_insert(tn, *a); 734 735 return res; 736 737 cleanup: 738 if (goto_ch) 739 tcf_chain_put_by_act(goto_ch); 740 kfree(params); 741 tcf_idr_release(*a, bind); 742 return err; 743 } 744 745 static void tcf_ct_cleanup(struct tc_action *a) 746 { 747 struct tcf_ct_params *params; 748 struct tcf_ct *c = to_ct(a); 749 750 params = rcu_dereference_protected(c->params, 1); 751 if (params) 752 call_rcu(¶ms->rcu, tcf_ct_params_free); 753 } 754 755 static int tcf_ct_dump_key_val(struct sk_buff *skb, 756 void *val, int val_type, 757 void *mask, int mask_type, 758 int len) 759 { 760 int err; 761 762 if (mask && !memchr_inv(mask, 0, len)) 763 return 0; 764 765 err = nla_put(skb, val_type, len, val); 766 if (err) 767 return err; 768 769 if (mask_type != TCA_CT_UNSPEC) { 770 err = nla_put(skb, mask_type, len, mask); 771 if (err) 772 return err; 773 } 774 775 return 0; 776 } 777 778 static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p) 779 { 780 struct nf_nat_range2 *range = &p->range; 781 782 if (!(p->ct_action & TCA_CT_ACT_NAT)) 783 return 0; 784 785 if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) 786 return 0; 787 788 if (range->flags & NF_NAT_RANGE_MAP_IPS) { 789 if (p->ipv4_range) { 790 if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MIN, 791 range->min_addr.ip)) 792 return -1; 793 if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MAX, 794 range->max_addr.ip)) 795 return -1; 796 } else { 797 if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MIN, 798 &range->min_addr.in6)) 799 return -1; 800 if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MAX, 801 &range->max_addr.in6)) 802 return -1; 803 } 804 } 805 806 if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { 807 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MIN, 808 range->min_proto.all)) 809 return -1; 810 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MAX, 811 range->max_proto.all)) 812 return -1; 813 } 814 815 return 0; 816 } 817 818 static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, 819 int bind, int ref) 820 { 821 unsigned char *b = skb_tail_pointer(skb); 822 struct tcf_ct *c = to_ct(a); 823 struct tcf_ct_params *p; 824 825 struct tc_ct opt = { 826 .index = c->tcf_index, 827 .refcnt = refcount_read(&c->tcf_refcnt) - ref, 828 .bindcnt = atomic_read(&c->tcf_bindcnt) - bind, 829 }; 830 struct tcf_t t; 831 832 spin_lock_bh(&c->tcf_lock); 833 p = rcu_dereference_protected(c->params, 834 lockdep_is_held(&c->tcf_lock)); 835 opt.action = c->tcf_action; 836 837 if (tcf_ct_dump_key_val(skb, 838 &p->ct_action, TCA_CT_ACTION, 839 NULL, TCA_CT_UNSPEC, 840 sizeof(p->ct_action))) 841 goto nla_put_failure; 842 843 if (p->ct_action & TCA_CT_ACT_CLEAR) 844 goto skip_dump; 845 846 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 847 tcf_ct_dump_key_val(skb, 848 &p->mark, TCA_CT_MARK, 849 &p->mark_mask, TCA_CT_MARK_MASK, 850 sizeof(p->mark))) 851 goto nla_put_failure; 852 853 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 854 tcf_ct_dump_key_val(skb, 855 p->labels, TCA_CT_LABELS, 856 p->labels_mask, TCA_CT_LABELS_MASK, 857 sizeof(p->labels))) 858 goto nla_put_failure; 859 860 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 861 tcf_ct_dump_key_val(skb, 862 &p->zone, TCA_CT_ZONE, 863 NULL, TCA_CT_UNSPEC, 864 sizeof(p->zone))) 865 goto nla_put_failure; 866 867 if (tcf_ct_dump_nat(skb, p)) 868 goto nla_put_failure; 869 870 skip_dump: 871 if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt)) 872 goto nla_put_failure; 873 874 tcf_tm_dump(&t, &c->tcf_tm); 875 if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD)) 876 goto nla_put_failure; 877 spin_unlock_bh(&c->tcf_lock); 878 879 return skb->len; 880 nla_put_failure: 881 spin_unlock_bh(&c->tcf_lock); 882 nlmsg_trim(skb, b); 883 return -1; 884 } 885 886 static int tcf_ct_walker(struct net *net, struct sk_buff *skb, 887 struct netlink_callback *cb, int type, 888 const struct tc_action_ops *ops, 889 struct netlink_ext_ack *extack) 890 { 891 struct tc_action_net *tn = net_generic(net, ct_net_id); 892 893 return tcf_generic_walker(tn, skb, cb, type, ops, extack); 894 } 895 896 static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) 897 { 898 struct tc_action_net *tn = net_generic(net, ct_net_id); 899 900 return tcf_idr_search(tn, a, index); 901 } 902 903 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, 904 u64 lastuse, bool hw) 905 { 906 struct tcf_ct *c = to_ct(a); 907 908 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); 909 910 if (hw) 911 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), 912 bytes, packets); 913 c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); 914 } 915 916 static struct tc_action_ops act_ct_ops = { 917 .kind = "ct", 918 .id = TCA_ID_CT, 919 .owner = THIS_MODULE, 920 .act = tcf_ct_act, 921 .dump = tcf_ct_dump, 922 .init = tcf_ct_init, 923 .cleanup = tcf_ct_cleanup, 924 .walk = tcf_ct_walker, 925 .lookup = tcf_ct_search, 926 .stats_update = tcf_stats_update, 927 .size = sizeof(struct tcf_ct), 928 }; 929 930 static __net_init int ct_init_net(struct net *net) 931 { 932 unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8; 933 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 934 935 if (nf_connlabels_get(net, n_bits - 1)) { 936 tn->labels = false; 937 pr_err("act_ct: Failed to set connlabels length"); 938 } else { 939 tn->labels = true; 940 } 941 942 return tc_action_net_init(net, &tn->tn, &act_ct_ops); 943 } 944 945 static void __net_exit ct_exit_net(struct list_head *net_list) 946 { 947 struct net *net; 948 949 rtnl_lock(); 950 list_for_each_entry(net, net_list, exit_list) { 951 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 952 953 if (tn->labels) 954 nf_connlabels_put(net); 955 } 956 rtnl_unlock(); 957 958 tc_action_net_exit(net_list, ct_net_id); 959 } 960 961 static struct pernet_operations ct_net_ops = { 962 .init = ct_init_net, 963 .exit_batch = ct_exit_net, 964 .id = &ct_net_id, 965 .size = sizeof(struct tc_ct_action_net), 966 }; 967 968 static int __init ct_init_module(void) 969 { 970 return tcf_register_action(&act_ct_ops, &ct_net_ops); 971 } 972 973 static void __exit ct_cleanup_module(void) 974 { 975 tcf_unregister_action(&act_ct_ops, &ct_net_ops); 976 } 977 978 module_init(ct_init_module); 979 module_exit(ct_cleanup_module); 980 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>"); 981 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>"); 982 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>"); 983 MODULE_DESCRIPTION("Connection tracking action"); 984 MODULE_LICENSE("GPL v2"); 985 986