1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2015 Nicira, Inc. 4 */ 5 6 #include <linux/module.h> 7 #include <linux/openvswitch.h> 8 #include <linux/tcp.h> 9 #include <linux/udp.h> 10 #include <linux/sctp.h> 11 #include <linux/static_key.h> 12 #include <linux/string_helpers.h> 13 #include <net/ip.h> 14 #include <net/genetlink.h> 15 #include <net/netfilter/nf_conntrack_core.h> 16 #include <net/netfilter/nf_conntrack_count.h> 17 #include <net/netfilter/nf_conntrack_helper.h> 18 #include <net/netfilter/nf_conntrack_labels.h> 19 #include <net/netfilter/nf_conntrack_seqadj.h> 20 #include <net/netfilter/nf_conntrack_timeout.h> 21 #include <net/netfilter/nf_conntrack_zones.h> 22 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 23 #include <net/ipv6_frag.h> 24 25 #if IS_ENABLED(CONFIG_NF_NAT) 26 #include <net/netfilter/nf_nat.h> 27 #endif 28 29 #include <net/netfilter/nf_conntrack_act_ct.h> 30 31 #include "datapath.h" 32 #include "conntrack.h" 33 #include "flow.h" 34 #include "flow_netlink.h" 35 36 struct ovs_ct_len_tbl { 37 int maxlen; 38 int minlen; 39 }; 40 41 /* Metadata mark for masked write to conntrack mark */ 42 struct md_mark { 43 u32 value; 44 u32 mask; 45 }; 46 47 /* Metadata label for masked write to conntrack label. */ 48 struct md_labels { 49 struct ovs_key_ct_labels value; 50 struct ovs_key_ct_labels mask; 51 }; 52 53 enum ovs_ct_nat { 54 OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */ 55 OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */ 56 OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */ 57 }; 58 59 /* Conntrack action context for execution. */ 60 struct ovs_conntrack_info { 61 struct nf_conntrack_helper *helper; 62 struct nf_conntrack_zone zone; 63 struct nf_conn *ct; 64 u8 commit : 1; 65 u8 nat : 3; /* enum ovs_ct_nat */ 66 u8 force : 1; 67 u8 have_eventmask : 1; 68 u16 family; 69 u32 eventmask; /* Mask of 1 << IPCT_*. */ 70 struct md_mark mark; 71 struct md_labels labels; 72 char timeout[CTNL_TIMEOUT_NAME_MAX]; 73 struct nf_ct_timeout *nf_ct_timeout; 74 #if IS_ENABLED(CONFIG_NF_NAT) 75 struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */ 76 #endif 77 }; 78 79 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 80 #define OVS_CT_LIMIT_UNLIMITED 0 81 #define OVS_CT_LIMIT_DEFAULT OVS_CT_LIMIT_UNLIMITED 82 #define CT_LIMIT_HASH_BUCKETS 512 83 static DEFINE_STATIC_KEY_FALSE(ovs_ct_limit_enabled); 84 85 struct ovs_ct_limit { 86 /* Elements in ovs_ct_limit_info->limits hash table */ 87 struct hlist_node hlist_node; 88 struct rcu_head rcu; 89 u16 zone; 90 u32 limit; 91 }; 92 93 struct ovs_ct_limit_info { 94 u32 default_limit; 95 struct hlist_head *limits; 96 struct nf_conncount_data *data; 97 }; 98 99 static const struct nla_policy ct_limit_policy[OVS_CT_LIMIT_ATTR_MAX + 1] = { 100 [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NLA_NESTED, }, 101 }; 102 #endif 103 104 static bool labels_nonzero(const struct ovs_key_ct_labels *labels); 105 106 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); 107 108 static u16 key_to_nfproto(const struct sw_flow_key *key) 109 { 110 switch (ntohs(key->eth.type)) { 111 case ETH_P_IP: 112 return NFPROTO_IPV4; 113 case ETH_P_IPV6: 114 return NFPROTO_IPV6; 115 default: 116 return NFPROTO_UNSPEC; 117 } 118 } 119 120 /* Map SKB connection state into the values used by flow definition. */ 121 static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) 122 { 123 u8 ct_state = OVS_CS_F_TRACKED; 124 125 switch (ctinfo) { 126 case IP_CT_ESTABLISHED_REPLY: 127 case IP_CT_RELATED_REPLY: 128 ct_state |= OVS_CS_F_REPLY_DIR; 129 break; 130 default: 131 break; 132 } 133 134 switch (ctinfo) { 135 case IP_CT_ESTABLISHED: 136 case IP_CT_ESTABLISHED_REPLY: 137 ct_state |= OVS_CS_F_ESTABLISHED; 138 break; 139 case IP_CT_RELATED: 140 case IP_CT_RELATED_REPLY: 141 ct_state |= OVS_CS_F_RELATED; 142 break; 143 case IP_CT_NEW: 144 ct_state |= OVS_CS_F_NEW; 145 break; 146 default: 147 break; 148 } 149 150 return ct_state; 151 } 152 153 static u32 ovs_ct_get_mark(const struct nf_conn *ct) 154 { 155 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 156 return ct ? READ_ONCE(ct->mark) : 0; 157 #else 158 return 0; 159 #endif 160 } 161 162 /* Guard against conntrack labels max size shrinking below 128 bits. */ 163 #if NF_CT_LABELS_MAX_SIZE < 16 164 #error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes 165 #endif 166 167 static void ovs_ct_get_labels(const struct nf_conn *ct, 168 struct ovs_key_ct_labels *labels) 169 { 170 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; 171 172 if (cl) 173 memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); 174 else 175 memset(labels, 0, OVS_CT_LABELS_LEN); 176 } 177 178 static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key, 179 const struct nf_conntrack_tuple *orig, 180 u8 icmp_proto) 181 { 182 key->ct_orig_proto = orig->dst.protonum; 183 if (orig->dst.protonum == icmp_proto) { 184 key->ct.orig_tp.src = htons(orig->dst.u.icmp.type); 185 key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code); 186 } else { 187 key->ct.orig_tp.src = orig->src.u.all; 188 key->ct.orig_tp.dst = orig->dst.u.all; 189 } 190 } 191 192 static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, 193 const struct nf_conntrack_zone *zone, 194 const struct nf_conn *ct) 195 { 196 key->ct_state = state; 197 key->ct_zone = zone->id; 198 key->ct.mark = ovs_ct_get_mark(ct); 199 ovs_ct_get_labels(ct, &key->ct.labels); 200 201 if (ct) { 202 const struct nf_conntrack_tuple *orig; 203 204 /* Use the master if we have one. */ 205 if (ct->master) 206 ct = ct->master; 207 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 208 209 /* IP version must match with the master connection. */ 210 if (key->eth.type == htons(ETH_P_IP) && 211 nf_ct_l3num(ct) == NFPROTO_IPV4) { 212 key->ipv4.ct_orig.src = orig->src.u3.ip; 213 key->ipv4.ct_orig.dst = orig->dst.u3.ip; 214 __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP); 215 return; 216 } else if (key->eth.type == htons(ETH_P_IPV6) && 217 !sw_flow_key_is_nd(key) && 218 nf_ct_l3num(ct) == NFPROTO_IPV6) { 219 key->ipv6.ct_orig.src = orig->src.u3.in6; 220 key->ipv6.ct_orig.dst = orig->dst.u3.in6; 221 __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP); 222 return; 223 } 224 } 225 /* Clear 'ct_orig_proto' to mark the non-existence of conntrack 226 * original direction key fields. 227 */ 228 key->ct_orig_proto = 0; 229 } 230 231 /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has 232 * previously sent the packet to conntrack via the ct action. If 233 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are 234 * initialized from the connection status. 235 */ 236 static void ovs_ct_update_key(const struct sk_buff *skb, 237 const struct ovs_conntrack_info *info, 238 struct sw_flow_key *key, bool post_ct, 239 bool keep_nat_flags) 240 { 241 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; 242 enum ip_conntrack_info ctinfo; 243 struct nf_conn *ct; 244 u8 state = 0; 245 246 ct = nf_ct_get(skb, &ctinfo); 247 if (ct) { 248 state = ovs_ct_get_state(ctinfo); 249 /* All unconfirmed entries are NEW connections. */ 250 if (!nf_ct_is_confirmed(ct)) 251 state |= OVS_CS_F_NEW; 252 /* OVS persists the related flag for the duration of the 253 * connection. 254 */ 255 if (ct->master) 256 state |= OVS_CS_F_RELATED; 257 if (keep_nat_flags) { 258 state |= key->ct_state & OVS_CS_F_NAT_MASK; 259 } else { 260 if (ct->status & IPS_SRC_NAT) 261 state |= OVS_CS_F_SRC_NAT; 262 if (ct->status & IPS_DST_NAT) 263 state |= OVS_CS_F_DST_NAT; 264 } 265 zone = nf_ct_zone(ct); 266 } else if (post_ct) { 267 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; 268 if (info) 269 zone = &info->zone; 270 } 271 __ovs_ct_update_key(key, state, zone, ct); 272 } 273 274 /* This is called to initialize CT key fields possibly coming in from the local 275 * stack. 276 */ 277 void ovs_ct_fill_key(const struct sk_buff *skb, 278 struct sw_flow_key *key, 279 bool post_ct) 280 { 281 ovs_ct_update_key(skb, NULL, key, post_ct, false); 282 } 283 284 int ovs_ct_put_key(const struct sw_flow_key *swkey, 285 const struct sw_flow_key *output, struct sk_buff *skb) 286 { 287 if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state)) 288 return -EMSGSIZE; 289 290 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 291 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone)) 292 return -EMSGSIZE; 293 294 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 295 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark)) 296 return -EMSGSIZE; 297 298 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 299 nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels), 300 &output->ct.labels)) 301 return -EMSGSIZE; 302 303 if (swkey->ct_orig_proto) { 304 if (swkey->eth.type == htons(ETH_P_IP)) { 305 struct ovs_key_ct_tuple_ipv4 orig; 306 307 memset(&orig, 0, sizeof(orig)); 308 orig.ipv4_src = output->ipv4.ct_orig.src; 309 orig.ipv4_dst = output->ipv4.ct_orig.dst; 310 orig.src_port = output->ct.orig_tp.src; 311 orig.dst_port = output->ct.orig_tp.dst; 312 orig.ipv4_proto = output->ct_orig_proto; 313 314 if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, 315 sizeof(orig), &orig)) 316 return -EMSGSIZE; 317 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 318 struct ovs_key_ct_tuple_ipv6 orig; 319 320 memset(&orig, 0, sizeof(orig)); 321 memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32, 322 sizeof(orig.ipv6_src)); 323 memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32, 324 sizeof(orig.ipv6_dst)); 325 orig.src_port = output->ct.orig_tp.src; 326 orig.dst_port = output->ct.orig_tp.dst; 327 orig.ipv6_proto = output->ct_orig_proto; 328 329 if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, 330 sizeof(orig), &orig)) 331 return -EMSGSIZE; 332 } 333 } 334 335 return 0; 336 } 337 338 static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, 339 u32 ct_mark, u32 mask) 340 { 341 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 342 u32 new_mark; 343 344 new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask)); 345 if (READ_ONCE(ct->mark) != new_mark) { 346 WRITE_ONCE(ct->mark, new_mark); 347 if (nf_ct_is_confirmed(ct)) 348 nf_conntrack_event_cache(IPCT_MARK, ct); 349 key->ct.mark = new_mark; 350 } 351 352 return 0; 353 #else 354 return -ENOTSUPP; 355 #endif 356 } 357 358 static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct) 359 { 360 struct nf_conn_labels *cl; 361 362 cl = nf_ct_labels_find(ct); 363 if (!cl) { 364 nf_ct_labels_ext_add(ct); 365 cl = nf_ct_labels_find(ct); 366 } 367 368 return cl; 369 } 370 371 /* Initialize labels for a new, yet to be committed conntrack entry. Note that 372 * since the new connection is not yet confirmed, and thus no-one else has 373 * access to it's labels, we simply write them over. 374 */ 375 static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, 376 const struct ovs_key_ct_labels *labels, 377 const struct ovs_key_ct_labels *mask) 378 { 379 struct nf_conn_labels *cl, *master_cl; 380 bool have_mask = labels_nonzero(mask); 381 382 /* Inherit master's labels to the related connection? */ 383 master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL; 384 385 if (!master_cl && !have_mask) 386 return 0; /* Nothing to do. */ 387 388 cl = ovs_ct_get_conn_labels(ct); 389 if (!cl) 390 return -ENOSPC; 391 392 /* Inherit the master's labels, if any. */ 393 if (master_cl) 394 *cl = *master_cl; 395 396 if (have_mask) { 397 u32 *dst = (u32 *)cl->bits; 398 int i; 399 400 for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) 401 dst[i] = (dst[i] & ~mask->ct_labels_32[i]) | 402 (labels->ct_labels_32[i] 403 & mask->ct_labels_32[i]); 404 } 405 406 /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the 407 * IPCT_LABEL bit is set in the event cache. 408 */ 409 nf_conntrack_event_cache(IPCT_LABEL, ct); 410 411 memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); 412 413 return 0; 414 } 415 416 static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key, 417 const struct ovs_key_ct_labels *labels, 418 const struct ovs_key_ct_labels *mask) 419 { 420 struct nf_conn_labels *cl; 421 int err; 422 423 cl = ovs_ct_get_conn_labels(ct); 424 if (!cl) 425 return -ENOSPC; 426 427 err = nf_connlabels_replace(ct, labels->ct_labels_32, 428 mask->ct_labels_32, 429 OVS_CT_LABELS_LEN_32); 430 if (err) 431 return err; 432 433 memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); 434 435 return 0; 436 } 437 438 static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key, 439 u16 zone, int family, struct sk_buff *skb) 440 { 441 struct ovs_skb_cb ovs_cb = *OVS_CB(skb); 442 int err; 443 444 err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru); 445 if (err) 446 return err; 447 448 /* The key extracted from the fragment that completed this datagram 449 * likely didn't have an L4 header, so regenerate it. 450 */ 451 ovs_flow_key_update_l3l4(skb, key); 452 key->ip.frag = OVS_FRAG_TYPE_NONE; 453 *OVS_CB(skb) = ovs_cb; 454 455 return 0; 456 } 457 458 /* This replicates logic from nf_conntrack_core.c that is not exported. */ 459 static enum ip_conntrack_info 460 ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) 461 { 462 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 463 464 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) 465 return IP_CT_ESTABLISHED_REPLY; 466 /* Once we've had two way comms, always ESTABLISHED. */ 467 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 468 return IP_CT_ESTABLISHED; 469 if (test_bit(IPS_EXPECTED_BIT, &ct->status)) 470 return IP_CT_RELATED; 471 return IP_CT_NEW; 472 } 473 474 /* Find an existing connection which this packet belongs to without 475 * re-attributing statistics or modifying the connection state. This allows an 476 * skb->_nfct lost due to an upcall to be recovered during actions execution. 477 * 478 * Must be called with rcu_read_lock. 479 * 480 * On success, populates skb->_nfct and returns the connection. Returns NULL 481 * if there is no existing entry. 482 */ 483 static struct nf_conn * 484 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, 485 u8 l3num, struct sk_buff *skb, bool natted) 486 { 487 struct nf_conntrack_tuple tuple; 488 struct nf_conntrack_tuple_hash *h; 489 struct nf_conn *ct; 490 491 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, 492 net, &tuple)) { 493 pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 494 return NULL; 495 } 496 497 /* Must invert the tuple if skb has been transformed by NAT. */ 498 if (natted) { 499 struct nf_conntrack_tuple inverse; 500 501 if (!nf_ct_invert_tuple(&inverse, &tuple)) { 502 pr_debug("ovs_ct_find_existing: Inversion failed!\n"); 503 return NULL; 504 } 505 tuple = inverse; 506 } 507 508 /* look for tuple match */ 509 h = nf_conntrack_find_get(net, zone, &tuple); 510 if (!h) 511 return NULL; /* Not found. */ 512 513 ct = nf_ct_tuplehash_to_ctrack(h); 514 515 /* Inverted packet tuple matches the reverse direction conntrack tuple, 516 * select the other tuplehash to get the right 'ctinfo' bits for this 517 * packet. 518 */ 519 if (natted) 520 h = &ct->tuplehash[!h->tuple.dst.dir]; 521 522 nf_ct_set(skb, ct, ovs_ct_get_info(h)); 523 return ct; 524 } 525 526 static 527 struct nf_conn *ovs_ct_executed(struct net *net, 528 const struct sw_flow_key *key, 529 const struct ovs_conntrack_info *info, 530 struct sk_buff *skb, 531 bool *ct_executed) 532 { 533 struct nf_conn *ct = NULL; 534 535 /* If no ct, check if we have evidence that an existing conntrack entry 536 * might be found for this skb. This happens when we lose a skb->_nfct 537 * due to an upcall, or if the direction is being forced. If the 538 * connection was not confirmed, it is not cached and needs to be run 539 * through conntrack again. 540 */ 541 *ct_executed = (key->ct_state & OVS_CS_F_TRACKED) && 542 !(key->ct_state & OVS_CS_F_INVALID) && 543 (key->ct_zone == info->zone.id); 544 545 if (*ct_executed || (!key->ct_state && info->force)) { 546 ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, 547 !!(key->ct_state & 548 OVS_CS_F_NAT_MASK)); 549 } 550 551 return ct; 552 } 553 554 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ 555 static bool skb_nfct_cached(struct net *net, 556 const struct sw_flow_key *key, 557 const struct ovs_conntrack_info *info, 558 struct sk_buff *skb) 559 { 560 enum ip_conntrack_info ctinfo; 561 struct nf_conn *ct; 562 bool ct_executed = true; 563 564 ct = nf_ct_get(skb, &ctinfo); 565 if (!ct) 566 ct = ovs_ct_executed(net, key, info, skb, &ct_executed); 567 568 if (ct) 569 nf_ct_get(skb, &ctinfo); 570 else 571 return false; 572 573 if (!net_eq(net, read_pnet(&ct->ct_net))) 574 return false; 575 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct))) 576 return false; 577 if (info->helper) { 578 struct nf_conn_help *help; 579 580 help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); 581 if (help && rcu_access_pointer(help->helper) != info->helper) 582 return false; 583 } 584 if (info->nf_ct_timeout) { 585 struct nf_conn_timeout *timeout_ext; 586 587 timeout_ext = nf_ct_timeout_find(ct); 588 if (!timeout_ext || info->nf_ct_timeout != 589 rcu_dereference(timeout_ext->timeout)) 590 return false; 591 } 592 /* Force conntrack entry direction to the current packet? */ 593 if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 594 /* Delete the conntrack entry if confirmed, else just release 595 * the reference. 596 */ 597 if (nf_ct_is_confirmed(ct)) 598 nf_ct_delete(ct, 0, 0); 599 600 nf_ct_put(ct); 601 nf_ct_set(skb, NULL, 0); 602 return false; 603 } 604 605 return ct_executed; 606 } 607 608 #if IS_ENABLED(CONFIG_NF_NAT) 609 static void ovs_nat_update_key(struct sw_flow_key *key, 610 const struct sk_buff *skb, 611 enum nf_nat_manip_type maniptype) 612 { 613 if (maniptype == NF_NAT_MANIP_SRC) { 614 __be16 src; 615 616 key->ct_state |= OVS_CS_F_SRC_NAT; 617 if (key->eth.type == htons(ETH_P_IP)) 618 key->ipv4.addr.src = ip_hdr(skb)->saddr; 619 else if (key->eth.type == htons(ETH_P_IPV6)) 620 memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, 621 sizeof(key->ipv6.addr.src)); 622 else 623 return; 624 625 if (key->ip.proto == IPPROTO_UDP) 626 src = udp_hdr(skb)->source; 627 else if (key->ip.proto == IPPROTO_TCP) 628 src = tcp_hdr(skb)->source; 629 else if (key->ip.proto == IPPROTO_SCTP) 630 src = sctp_hdr(skb)->source; 631 else 632 return; 633 634 key->tp.src = src; 635 } else { 636 __be16 dst; 637 638 key->ct_state |= OVS_CS_F_DST_NAT; 639 if (key->eth.type == htons(ETH_P_IP)) 640 key->ipv4.addr.dst = ip_hdr(skb)->daddr; 641 else if (key->eth.type == htons(ETH_P_IPV6)) 642 memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, 643 sizeof(key->ipv6.addr.dst)); 644 else 645 return; 646 647 if (key->ip.proto == IPPROTO_UDP) 648 dst = udp_hdr(skb)->dest; 649 else if (key->ip.proto == IPPROTO_TCP) 650 dst = tcp_hdr(skb)->dest; 651 else if (key->ip.proto == IPPROTO_SCTP) 652 dst = sctp_hdr(skb)->dest; 653 else 654 return; 655 656 key->tp.dst = dst; 657 } 658 } 659 660 /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ 661 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 662 const struct ovs_conntrack_info *info, 663 struct sk_buff *skb, struct nf_conn *ct, 664 enum ip_conntrack_info ctinfo) 665 { 666 int err, action = 0; 667 668 if (!(info->nat & OVS_CT_NAT)) 669 return NF_ACCEPT; 670 if (info->nat & OVS_CT_SRC_NAT) 671 action |= BIT(NF_NAT_MANIP_SRC); 672 if (info->nat & OVS_CT_DST_NAT) 673 action |= BIT(NF_NAT_MANIP_DST); 674 675 err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit); 676 677 if (action & BIT(NF_NAT_MANIP_SRC)) 678 ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC); 679 if (action & BIT(NF_NAT_MANIP_DST)) 680 ovs_nat_update_key(key, skb, NF_NAT_MANIP_DST); 681 682 return err; 683 } 684 #else /* !CONFIG_NF_NAT */ 685 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 686 const struct ovs_conntrack_info *info, 687 struct sk_buff *skb, struct nf_conn *ct, 688 enum ip_conntrack_info ctinfo) 689 { 690 return NF_ACCEPT; 691 } 692 #endif 693 694 /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if 695 * not done already. Update key with new CT state after passing the packet 696 * through conntrack. 697 * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be 698 * set to NULL and 0 will be returned. 699 */ 700 static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 701 const struct ovs_conntrack_info *info, 702 struct sk_buff *skb) 703 { 704 /* If we are recirculating packets to match on conntrack fields and 705 * committing with a separate conntrack action, then we don't need to 706 * actually run the packet through conntrack twice unless it's for a 707 * different zone. 708 */ 709 bool cached = skb_nfct_cached(net, key, info, skb); 710 enum ip_conntrack_info ctinfo; 711 struct nf_conn *ct; 712 713 if (!cached) { 714 struct nf_hook_state state = { 715 .hook = NF_INET_PRE_ROUTING, 716 .pf = info->family, 717 .net = net, 718 }; 719 struct nf_conn *tmpl = info->ct; 720 int err; 721 722 /* Associate skb with specified zone. */ 723 if (tmpl) { 724 ct = nf_ct_get(skb, &ctinfo); 725 nf_ct_put(ct); 726 nf_conntrack_get(&tmpl->ct_general); 727 nf_ct_set(skb, tmpl, IP_CT_NEW); 728 } 729 730 err = nf_conntrack_in(skb, &state); 731 if (err != NF_ACCEPT) 732 return -ENOENT; 733 734 /* Clear CT state NAT flags to mark that we have not yet done 735 * NAT after the nf_conntrack_in() call. We can actually clear 736 * the whole state, as it will be re-initialized below. 737 */ 738 key->ct_state = 0; 739 740 /* Update the key, but keep the NAT flags. */ 741 ovs_ct_update_key(skb, info, key, true, true); 742 } 743 744 ct = nf_ct_get(skb, &ctinfo); 745 if (ct) { 746 bool add_helper = false; 747 748 /* Packets starting a new connection must be NATted before the 749 * helper, so that the helper knows about the NAT. We enforce 750 * this by delaying both NAT and helper calls for unconfirmed 751 * connections until the committing CT action. For later 752 * packets NAT and Helper may be called in either order. 753 * 754 * NAT will be done only if the CT action has NAT, and only 755 * once per packet (per zone), as guarded by the NAT bits in 756 * the key->ct_state. 757 */ 758 if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && 759 (nf_ct_is_confirmed(ct) || info->commit) && 760 ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { 761 return -EINVAL; 762 } 763 764 /* Userspace may decide to perform a ct lookup without a helper 765 * specified followed by a (recirculate and) commit with one, 766 * or attach a helper in a later commit. Therefore, for 767 * connections which we will commit, we may need to attach 768 * the helper here. 769 */ 770 if (!nf_ct_is_confirmed(ct) && info->commit && 771 info->helper && !nfct_help(ct)) { 772 int err = __nf_ct_try_assign_helper(ct, info->ct, 773 GFP_ATOMIC); 774 if (err) 775 return err; 776 add_helper = true; 777 778 /* helper installed, add seqadj if NAT is required */ 779 if (info->nat && !nfct_seqadj(ct)) { 780 if (!nfct_seqadj_ext_add(ct)) 781 return -EINVAL; 782 } 783 } 784 785 /* Call the helper only if: 786 * - nf_conntrack_in() was executed above ("!cached") or a 787 * helper was just attached ("add_helper") for a confirmed 788 * connection, or 789 * - When committing an unconfirmed connection. 790 */ 791 if ((nf_ct_is_confirmed(ct) ? !cached || add_helper : 792 info->commit) && 793 nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) { 794 return -EINVAL; 795 } 796 797 if (nf_ct_protonum(ct) == IPPROTO_TCP && 798 nf_ct_is_confirmed(ct) && nf_conntrack_tcp_established(ct)) { 799 /* Be liberal for tcp packets so that out-of-window 800 * packets are not marked invalid. 801 */ 802 nf_ct_set_tcp_be_liberal(ct); 803 } 804 805 nf_conn_act_ct_ext_fill(skb, ct, ctinfo); 806 } 807 808 return 0; 809 } 810 811 /* Lookup connection and read fields into key. */ 812 static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 813 const struct ovs_conntrack_info *info, 814 struct sk_buff *skb) 815 { 816 struct nf_conn *ct; 817 int err; 818 819 err = __ovs_ct_lookup(net, key, info, skb); 820 if (err) 821 return err; 822 823 ct = (struct nf_conn *)skb_nfct(skb); 824 if (ct) 825 nf_ct_deliver_cached_events(ct); 826 827 return 0; 828 } 829 830 static bool labels_nonzero(const struct ovs_key_ct_labels *labels) 831 { 832 size_t i; 833 834 for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) 835 if (labels->ct_labels_32[i]) 836 return true; 837 838 return false; 839 } 840 841 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 842 static struct hlist_head *ct_limit_hash_bucket( 843 const struct ovs_ct_limit_info *info, u16 zone) 844 { 845 return &info->limits[zone & (CT_LIMIT_HASH_BUCKETS - 1)]; 846 } 847 848 /* Call with ovs_mutex */ 849 static void ct_limit_set(const struct ovs_ct_limit_info *info, 850 struct ovs_ct_limit *new_ct_limit) 851 { 852 struct ovs_ct_limit *ct_limit; 853 struct hlist_head *head; 854 855 head = ct_limit_hash_bucket(info, new_ct_limit->zone); 856 hlist_for_each_entry_rcu(ct_limit, head, hlist_node) { 857 if (ct_limit->zone == new_ct_limit->zone) { 858 hlist_replace_rcu(&ct_limit->hlist_node, 859 &new_ct_limit->hlist_node); 860 kfree_rcu(ct_limit, rcu); 861 return; 862 } 863 } 864 865 hlist_add_head_rcu(&new_ct_limit->hlist_node, head); 866 } 867 868 /* Call with ovs_mutex */ 869 static void ct_limit_del(const struct ovs_ct_limit_info *info, u16 zone) 870 { 871 struct ovs_ct_limit *ct_limit; 872 struct hlist_head *head; 873 struct hlist_node *n; 874 875 head = ct_limit_hash_bucket(info, zone); 876 hlist_for_each_entry_safe(ct_limit, n, head, hlist_node) { 877 if (ct_limit->zone == zone) { 878 hlist_del_rcu(&ct_limit->hlist_node); 879 kfree_rcu(ct_limit, rcu); 880 return; 881 } 882 } 883 } 884 885 /* Call with RCU read lock */ 886 static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone) 887 { 888 struct ovs_ct_limit *ct_limit; 889 struct hlist_head *head; 890 891 head = ct_limit_hash_bucket(info, zone); 892 hlist_for_each_entry_rcu(ct_limit, head, hlist_node) { 893 if (ct_limit->zone == zone) 894 return ct_limit->limit; 895 } 896 897 return info->default_limit; 898 } 899 900 static int ovs_ct_check_limit(struct net *net, 901 const struct ovs_conntrack_info *info, 902 const struct nf_conntrack_tuple *tuple) 903 { 904 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 905 const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; 906 u32 per_zone_limit, connections; 907 u32 conncount_key; 908 909 conncount_key = info->zone.id; 910 911 per_zone_limit = ct_limit_get(ct_limit_info, info->zone.id); 912 if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED) 913 return 0; 914 915 connections = nf_conncount_count(net, ct_limit_info->data, 916 &conncount_key, tuple, &info->zone); 917 if (connections > per_zone_limit) 918 return -ENOMEM; 919 920 return 0; 921 } 922 #endif 923 924 /* Lookup connection and confirm if unconfirmed. */ 925 static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, 926 const struct ovs_conntrack_info *info, 927 struct sk_buff *skb) 928 { 929 enum ip_conntrack_info ctinfo; 930 struct nf_conn *ct; 931 int err; 932 933 err = __ovs_ct_lookup(net, key, info, skb); 934 if (err) 935 return err; 936 937 /* The connection could be invalid, in which case this is a no-op.*/ 938 ct = nf_ct_get(skb, &ctinfo); 939 if (!ct) 940 return 0; 941 942 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 943 if (static_branch_unlikely(&ovs_ct_limit_enabled)) { 944 if (!nf_ct_is_confirmed(ct)) { 945 err = ovs_ct_check_limit(net, info, 946 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 947 if (err) { 948 net_warn_ratelimited("openvswitch: zone: %u " 949 "exceeds conntrack limit\n", 950 info->zone.id); 951 return err; 952 } 953 } 954 } 955 #endif 956 957 /* Set the conntrack event mask if given. NEW and DELETE events have 958 * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener 959 * typically would receive many kinds of updates. Setting the event 960 * mask allows those events to be filtered. The set event mask will 961 * remain in effect for the lifetime of the connection unless changed 962 * by a further CT action with both the commit flag and the eventmask 963 * option. */ 964 if (info->have_eventmask) { 965 struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct); 966 967 if (cache) 968 cache->ctmask = info->eventmask; 969 } 970 971 /* Apply changes before confirming the connection so that the initial 972 * conntrack NEW netlink event carries the values given in the CT 973 * action. 974 */ 975 if (info->mark.mask) { 976 err = ovs_ct_set_mark(ct, key, info->mark.value, 977 info->mark.mask); 978 if (err) 979 return err; 980 } 981 if (!nf_ct_is_confirmed(ct)) { 982 err = ovs_ct_init_labels(ct, key, &info->labels.value, 983 &info->labels.mask); 984 if (err) 985 return err; 986 987 nf_conn_act_ct_ext_add(ct); 988 } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 989 labels_nonzero(&info->labels.mask)) { 990 err = ovs_ct_set_labels(ct, key, &info->labels.value, 991 &info->labels.mask); 992 if (err) 993 return err; 994 } 995 /* This will take care of sending queued events even if the connection 996 * is already confirmed. 997 */ 998 if (nf_conntrack_confirm(skb) != NF_ACCEPT) 999 return -EINVAL; 1000 1001 return 0; 1002 } 1003 1004 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 1005 * value if 'skb' is freed. 1006 */ 1007 int ovs_ct_execute(struct net *net, struct sk_buff *skb, 1008 struct sw_flow_key *key, 1009 const struct ovs_conntrack_info *info) 1010 { 1011 int nh_ofs; 1012 int err; 1013 1014 /* The conntrack module expects to be working at L3. */ 1015 nh_ofs = skb_network_offset(skb); 1016 skb_pull_rcsum(skb, nh_ofs); 1017 1018 err = nf_ct_skb_network_trim(skb, info->family); 1019 if (err) { 1020 kfree_skb(skb); 1021 return err; 1022 } 1023 1024 if (key->ip.frag != OVS_FRAG_TYPE_NONE) { 1025 err = ovs_ct_handle_fragments(net, key, info->zone.id, 1026 info->family, skb); 1027 if (err) 1028 return err; 1029 } 1030 1031 if (info->commit) 1032 err = ovs_ct_commit(net, key, info, skb); 1033 else 1034 err = ovs_ct_lookup(net, key, info, skb); 1035 1036 skb_push_rcsum(skb, nh_ofs); 1037 if (err) 1038 kfree_skb(skb); 1039 return err; 1040 } 1041 1042 int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key) 1043 { 1044 enum ip_conntrack_info ctinfo; 1045 struct nf_conn *ct; 1046 1047 ct = nf_ct_get(skb, &ctinfo); 1048 1049 nf_ct_put(ct); 1050 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 1051 1052 if (key) 1053 ovs_ct_fill_key(skb, key, false); 1054 1055 return 0; 1056 } 1057 1058 #if IS_ENABLED(CONFIG_NF_NAT) 1059 static int parse_nat(const struct nlattr *attr, 1060 struct ovs_conntrack_info *info, bool log) 1061 { 1062 struct nlattr *a; 1063 int rem; 1064 bool have_ip_max = false; 1065 bool have_proto_max = false; 1066 bool ip_vers = (info->family == NFPROTO_IPV6); 1067 1068 nla_for_each_nested(a, attr, rem) { 1069 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = { 1070 [OVS_NAT_ATTR_SRC] = {0, 0}, 1071 [OVS_NAT_ATTR_DST] = {0, 0}, 1072 [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr), 1073 sizeof(struct in6_addr)}, 1074 [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr), 1075 sizeof(struct in6_addr)}, 1076 [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)}, 1077 [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)}, 1078 [OVS_NAT_ATTR_PERSISTENT] = {0, 0}, 1079 [OVS_NAT_ATTR_PROTO_HASH] = {0, 0}, 1080 [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0}, 1081 }; 1082 int type = nla_type(a); 1083 1084 if (type > OVS_NAT_ATTR_MAX) { 1085 OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)", 1086 type, OVS_NAT_ATTR_MAX); 1087 return -EINVAL; 1088 } 1089 1090 if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) { 1091 OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)", 1092 type, nla_len(a), 1093 ovs_nat_attr_lens[type][ip_vers]); 1094 return -EINVAL; 1095 } 1096 1097 switch (type) { 1098 case OVS_NAT_ATTR_SRC: 1099 case OVS_NAT_ATTR_DST: 1100 if (info->nat) { 1101 OVS_NLERR(log, "Only one type of NAT may be specified"); 1102 return -ERANGE; 1103 } 1104 info->nat |= OVS_CT_NAT; 1105 info->nat |= ((type == OVS_NAT_ATTR_SRC) 1106 ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT); 1107 break; 1108 1109 case OVS_NAT_ATTR_IP_MIN: 1110 nla_memcpy(&info->range.min_addr, a, 1111 sizeof(info->range.min_addr)); 1112 info->range.flags |= NF_NAT_RANGE_MAP_IPS; 1113 break; 1114 1115 case OVS_NAT_ATTR_IP_MAX: 1116 have_ip_max = true; 1117 nla_memcpy(&info->range.max_addr, a, 1118 sizeof(info->range.max_addr)); 1119 info->range.flags |= NF_NAT_RANGE_MAP_IPS; 1120 break; 1121 1122 case OVS_NAT_ATTR_PROTO_MIN: 1123 info->range.min_proto.all = htons(nla_get_u16(a)); 1124 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 1125 break; 1126 1127 case OVS_NAT_ATTR_PROTO_MAX: 1128 have_proto_max = true; 1129 info->range.max_proto.all = htons(nla_get_u16(a)); 1130 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 1131 break; 1132 1133 case OVS_NAT_ATTR_PERSISTENT: 1134 info->range.flags |= NF_NAT_RANGE_PERSISTENT; 1135 break; 1136 1137 case OVS_NAT_ATTR_PROTO_HASH: 1138 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM; 1139 break; 1140 1141 case OVS_NAT_ATTR_PROTO_RANDOM: 1142 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY; 1143 break; 1144 1145 default: 1146 OVS_NLERR(log, "Unknown nat attribute (%d)", type); 1147 return -EINVAL; 1148 } 1149 } 1150 1151 if (rem > 0) { 1152 OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem); 1153 return -EINVAL; 1154 } 1155 if (!info->nat) { 1156 /* Do not allow flags if no type is given. */ 1157 if (info->range.flags) { 1158 OVS_NLERR(log, 1159 "NAT flags may be given only when NAT range (SRC or DST) is also specified." 1160 ); 1161 return -EINVAL; 1162 } 1163 info->nat = OVS_CT_NAT; /* NAT existing connections. */ 1164 } else if (!info->commit) { 1165 OVS_NLERR(log, 1166 "NAT attributes may be specified only when CT COMMIT flag is also specified." 1167 ); 1168 return -EINVAL; 1169 } 1170 /* Allow missing IP_MAX. */ 1171 if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) { 1172 memcpy(&info->range.max_addr, &info->range.min_addr, 1173 sizeof(info->range.max_addr)); 1174 } 1175 /* Allow missing PROTO_MAX. */ 1176 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 1177 !have_proto_max) { 1178 info->range.max_proto.all = info->range.min_proto.all; 1179 } 1180 return 0; 1181 } 1182 #endif 1183 1184 static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { 1185 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1186 [OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1187 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), 1188 .maxlen = sizeof(u16) }, 1189 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), 1190 .maxlen = sizeof(struct md_mark) }, 1191 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), 1192 .maxlen = sizeof(struct md_labels) }, 1193 [OVS_CT_ATTR_HELPER] = { .minlen = 1, 1194 .maxlen = NF_CT_HELPER_NAME_LEN }, 1195 #if IS_ENABLED(CONFIG_NF_NAT) 1196 /* NAT length is checked when parsing the nested attributes. */ 1197 [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, 1198 #endif 1199 [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32), 1200 .maxlen = sizeof(u32) }, 1201 [OVS_CT_ATTR_TIMEOUT] = { .minlen = 1, 1202 .maxlen = CTNL_TIMEOUT_NAME_MAX }, 1203 }; 1204 1205 static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, 1206 const char **helper, bool log) 1207 { 1208 struct nlattr *a; 1209 int rem; 1210 1211 nla_for_each_nested(a, attr, rem) { 1212 int type = nla_type(a); 1213 int maxlen; 1214 int minlen; 1215 1216 if (type > OVS_CT_ATTR_MAX) { 1217 OVS_NLERR(log, 1218 "Unknown conntrack attr (type=%d, max=%d)", 1219 type, OVS_CT_ATTR_MAX); 1220 return -EINVAL; 1221 } 1222 1223 maxlen = ovs_ct_attr_lens[type].maxlen; 1224 minlen = ovs_ct_attr_lens[type].minlen; 1225 if (nla_len(a) < minlen || nla_len(a) > maxlen) { 1226 OVS_NLERR(log, 1227 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", 1228 type, nla_len(a), maxlen); 1229 return -EINVAL; 1230 } 1231 1232 switch (type) { 1233 case OVS_CT_ATTR_FORCE_COMMIT: 1234 info->force = true; 1235 fallthrough; 1236 case OVS_CT_ATTR_COMMIT: 1237 info->commit = true; 1238 break; 1239 #ifdef CONFIG_NF_CONNTRACK_ZONES 1240 case OVS_CT_ATTR_ZONE: 1241 info->zone.id = nla_get_u16(a); 1242 break; 1243 #endif 1244 #ifdef CONFIG_NF_CONNTRACK_MARK 1245 case OVS_CT_ATTR_MARK: { 1246 struct md_mark *mark = nla_data(a); 1247 1248 if (!mark->mask) { 1249 OVS_NLERR(log, "ct_mark mask cannot be 0"); 1250 return -EINVAL; 1251 } 1252 info->mark = *mark; 1253 break; 1254 } 1255 #endif 1256 #ifdef CONFIG_NF_CONNTRACK_LABELS 1257 case OVS_CT_ATTR_LABELS: { 1258 struct md_labels *labels = nla_data(a); 1259 1260 if (!labels_nonzero(&labels->mask)) { 1261 OVS_NLERR(log, "ct_labels mask cannot be 0"); 1262 return -EINVAL; 1263 } 1264 info->labels = *labels; 1265 break; 1266 } 1267 #endif 1268 case OVS_CT_ATTR_HELPER: 1269 *helper = nla_data(a); 1270 if (!string_is_terminated(*helper, nla_len(a))) { 1271 OVS_NLERR(log, "Invalid conntrack helper"); 1272 return -EINVAL; 1273 } 1274 break; 1275 #if IS_ENABLED(CONFIG_NF_NAT) 1276 case OVS_CT_ATTR_NAT: { 1277 int err = parse_nat(a, info, log); 1278 1279 if (err) 1280 return err; 1281 break; 1282 } 1283 #endif 1284 case OVS_CT_ATTR_EVENTMASK: 1285 info->have_eventmask = true; 1286 info->eventmask = nla_get_u32(a); 1287 break; 1288 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 1289 case OVS_CT_ATTR_TIMEOUT: 1290 memcpy(info->timeout, nla_data(a), nla_len(a)); 1291 if (!string_is_terminated(info->timeout, nla_len(a))) { 1292 OVS_NLERR(log, "Invalid conntrack timeout"); 1293 return -EINVAL; 1294 } 1295 break; 1296 #endif 1297 1298 default: 1299 OVS_NLERR(log, "Unknown conntrack attr (%d)", 1300 type); 1301 return -EINVAL; 1302 } 1303 } 1304 1305 #ifdef CONFIG_NF_CONNTRACK_MARK 1306 if (!info->commit && info->mark.mask) { 1307 OVS_NLERR(log, 1308 "Setting conntrack mark requires 'commit' flag."); 1309 return -EINVAL; 1310 } 1311 #endif 1312 #ifdef CONFIG_NF_CONNTRACK_LABELS 1313 if (!info->commit && labels_nonzero(&info->labels.mask)) { 1314 OVS_NLERR(log, 1315 "Setting conntrack labels requires 'commit' flag."); 1316 return -EINVAL; 1317 } 1318 #endif 1319 if (rem > 0) { 1320 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem); 1321 return -EINVAL; 1322 } 1323 1324 return 0; 1325 } 1326 1327 bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) 1328 { 1329 if (attr == OVS_KEY_ATTR_CT_STATE) 1330 return true; 1331 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 1332 attr == OVS_KEY_ATTR_CT_ZONE) 1333 return true; 1334 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 1335 attr == OVS_KEY_ATTR_CT_MARK) 1336 return true; 1337 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1338 attr == OVS_KEY_ATTR_CT_LABELS) { 1339 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1340 1341 return ovs_net->xt_label; 1342 } 1343 1344 return false; 1345 } 1346 1347 int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, 1348 const struct sw_flow_key *key, 1349 struct sw_flow_actions **sfa, bool log) 1350 { 1351 struct ovs_conntrack_info ct_info; 1352 const char *helper = NULL; 1353 u16 family; 1354 int err; 1355 1356 family = key_to_nfproto(key); 1357 if (family == NFPROTO_UNSPEC) { 1358 OVS_NLERR(log, "ct family unspecified"); 1359 return -EINVAL; 1360 } 1361 1362 memset(&ct_info, 0, sizeof(ct_info)); 1363 ct_info.family = family; 1364 1365 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID, 1366 NF_CT_DEFAULT_ZONE_DIR, 0); 1367 1368 err = parse_ct(attr, &ct_info, &helper, log); 1369 if (err) 1370 return err; 1371 1372 /* Set up template for tracking connections in specific zones. */ 1373 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL); 1374 if (!ct_info.ct) { 1375 OVS_NLERR(log, "Failed to allocate conntrack template"); 1376 return -ENOMEM; 1377 } 1378 1379 if (ct_info.timeout[0]) { 1380 if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, 1381 ct_info.timeout)) 1382 pr_info_ratelimited("Failed to associated timeout " 1383 "policy `%s'\n", ct_info.timeout); 1384 else 1385 ct_info.nf_ct_timeout = rcu_dereference( 1386 nf_ct_timeout_find(ct_info.ct)->timeout); 1387 1388 } 1389 1390 if (helper) { 1391 err = nf_ct_add_helper(ct_info.ct, helper, ct_info.family, 1392 key->ip.proto, ct_info.nat, &ct_info.helper); 1393 if (err) { 1394 OVS_NLERR(log, "Failed to add %s helper %d", helper, err); 1395 goto err_free_ct; 1396 } 1397 } 1398 1399 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info, 1400 sizeof(ct_info), log); 1401 if (err) 1402 goto err_free_ct; 1403 1404 if (ct_info.commit) 1405 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); 1406 return 0; 1407 err_free_ct: 1408 __ovs_ct_free_action(&ct_info); 1409 return err; 1410 } 1411 1412 #if IS_ENABLED(CONFIG_NF_NAT) 1413 static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, 1414 struct sk_buff *skb) 1415 { 1416 struct nlattr *start; 1417 1418 start = nla_nest_start_noflag(skb, OVS_CT_ATTR_NAT); 1419 if (!start) 1420 return false; 1421 1422 if (info->nat & OVS_CT_SRC_NAT) { 1423 if (nla_put_flag(skb, OVS_NAT_ATTR_SRC)) 1424 return false; 1425 } else if (info->nat & OVS_CT_DST_NAT) { 1426 if (nla_put_flag(skb, OVS_NAT_ATTR_DST)) 1427 return false; 1428 } else { 1429 goto out; 1430 } 1431 1432 if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { 1433 if (IS_ENABLED(CONFIG_NF_NAT) && 1434 info->family == NFPROTO_IPV4) { 1435 if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, 1436 info->range.min_addr.ip) || 1437 (info->range.max_addr.ip 1438 != info->range.min_addr.ip && 1439 (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, 1440 info->range.max_addr.ip)))) 1441 return false; 1442 } else if (IS_ENABLED(CONFIG_IPV6) && 1443 info->family == NFPROTO_IPV6) { 1444 if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, 1445 &info->range.min_addr.in6) || 1446 (memcmp(&info->range.max_addr.in6, 1447 &info->range.min_addr.in6, 1448 sizeof(info->range.max_addr.in6)) && 1449 (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, 1450 &info->range.max_addr.in6)))) 1451 return false; 1452 } else { 1453 return false; 1454 } 1455 } 1456 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 1457 (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, 1458 ntohs(info->range.min_proto.all)) || 1459 (info->range.max_proto.all != info->range.min_proto.all && 1460 nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, 1461 ntohs(info->range.max_proto.all))))) 1462 return false; 1463 1464 if (info->range.flags & NF_NAT_RANGE_PERSISTENT && 1465 nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT)) 1466 return false; 1467 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM && 1468 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH)) 1469 return false; 1470 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY && 1471 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM)) 1472 return false; 1473 out: 1474 nla_nest_end(skb, start); 1475 1476 return true; 1477 } 1478 #endif 1479 1480 int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, 1481 struct sk_buff *skb) 1482 { 1483 struct nlattr *start; 1484 1485 start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CT); 1486 if (!start) 1487 return -EMSGSIZE; 1488 1489 if (ct_info->commit && nla_put_flag(skb, ct_info->force 1490 ? OVS_CT_ATTR_FORCE_COMMIT 1491 : OVS_CT_ATTR_COMMIT)) 1492 return -EMSGSIZE; 1493 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 1494 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) 1495 return -EMSGSIZE; 1496 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask && 1497 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), 1498 &ct_info->mark)) 1499 return -EMSGSIZE; 1500 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1501 labels_nonzero(&ct_info->labels.mask) && 1502 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), 1503 &ct_info->labels)) 1504 return -EMSGSIZE; 1505 if (ct_info->helper) { 1506 if (nla_put_string(skb, OVS_CT_ATTR_HELPER, 1507 ct_info->helper->name)) 1508 return -EMSGSIZE; 1509 } 1510 if (ct_info->have_eventmask && 1511 nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask)) 1512 return -EMSGSIZE; 1513 if (ct_info->timeout[0]) { 1514 if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout)) 1515 return -EMSGSIZE; 1516 } 1517 1518 #if IS_ENABLED(CONFIG_NF_NAT) 1519 if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) 1520 return -EMSGSIZE; 1521 #endif 1522 nla_nest_end(skb, start); 1523 1524 return 0; 1525 } 1526 1527 void ovs_ct_free_action(const struct nlattr *a) 1528 { 1529 struct ovs_conntrack_info *ct_info = nla_data(a); 1530 1531 __ovs_ct_free_action(ct_info); 1532 } 1533 1534 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) 1535 { 1536 if (ct_info->helper) { 1537 #if IS_ENABLED(CONFIG_NF_NAT) 1538 if (ct_info->nat) 1539 nf_nat_helper_put(ct_info->helper); 1540 #endif 1541 nf_conntrack_helper_put(ct_info->helper); 1542 } 1543 if (ct_info->ct) { 1544 if (ct_info->timeout[0]) 1545 nf_ct_destroy_timeout(ct_info->ct); 1546 nf_ct_tmpl_free(ct_info->ct); 1547 } 1548 } 1549 1550 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 1551 static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net) 1552 { 1553 int i, err; 1554 1555 ovs_net->ct_limit_info = kmalloc(sizeof(*ovs_net->ct_limit_info), 1556 GFP_KERNEL); 1557 if (!ovs_net->ct_limit_info) 1558 return -ENOMEM; 1559 1560 ovs_net->ct_limit_info->default_limit = OVS_CT_LIMIT_DEFAULT; 1561 ovs_net->ct_limit_info->limits = 1562 kmalloc_array(CT_LIMIT_HASH_BUCKETS, sizeof(struct hlist_head), 1563 GFP_KERNEL); 1564 if (!ovs_net->ct_limit_info->limits) { 1565 kfree(ovs_net->ct_limit_info); 1566 return -ENOMEM; 1567 } 1568 1569 for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++) 1570 INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]); 1571 1572 ovs_net->ct_limit_info->data = 1573 nf_conncount_init(net, NFPROTO_INET, sizeof(u32)); 1574 1575 if (IS_ERR(ovs_net->ct_limit_info->data)) { 1576 err = PTR_ERR(ovs_net->ct_limit_info->data); 1577 kfree(ovs_net->ct_limit_info->limits); 1578 kfree(ovs_net->ct_limit_info); 1579 pr_err("openvswitch: failed to init nf_conncount %d\n", err); 1580 return err; 1581 } 1582 return 0; 1583 } 1584 1585 static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) 1586 { 1587 const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info; 1588 int i; 1589 1590 nf_conncount_destroy(net, NFPROTO_INET, info->data); 1591 for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { 1592 struct hlist_head *head = &info->limits[i]; 1593 struct ovs_ct_limit *ct_limit; 1594 1595 hlist_for_each_entry_rcu(ct_limit, head, hlist_node, 1596 lockdep_ovsl_is_held()) 1597 kfree_rcu(ct_limit, rcu); 1598 } 1599 kfree(info->limits); 1600 kfree(info); 1601 } 1602 1603 static struct sk_buff * 1604 ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd, 1605 struct ovs_header **ovs_reply_header) 1606 { 1607 struct ovs_header *ovs_header = info->userhdr; 1608 struct sk_buff *skb; 1609 1610 skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1611 if (!skb) 1612 return ERR_PTR(-ENOMEM); 1613 1614 *ovs_reply_header = genlmsg_put(skb, info->snd_portid, 1615 info->snd_seq, 1616 &dp_ct_limit_genl_family, 0, cmd); 1617 1618 if (!*ovs_reply_header) { 1619 nlmsg_free(skb); 1620 return ERR_PTR(-EMSGSIZE); 1621 } 1622 (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; 1623 1624 return skb; 1625 } 1626 1627 static bool check_zone_id(int zone_id, u16 *pzone) 1628 { 1629 if (zone_id >= 0 && zone_id <= 65535) { 1630 *pzone = (u16)zone_id; 1631 return true; 1632 } 1633 return false; 1634 } 1635 1636 static int ovs_ct_limit_set_zone_limit(struct nlattr *nla_zone_limit, 1637 struct ovs_ct_limit_info *info) 1638 { 1639 struct ovs_zone_limit *zone_limit; 1640 int rem; 1641 u16 zone; 1642 1643 rem = NLA_ALIGN(nla_len(nla_zone_limit)); 1644 zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit); 1645 1646 while (rem >= sizeof(*zone_limit)) { 1647 if (unlikely(zone_limit->zone_id == 1648 OVS_ZONE_LIMIT_DEFAULT_ZONE)) { 1649 ovs_lock(); 1650 info->default_limit = zone_limit->limit; 1651 ovs_unlock(); 1652 } else if (unlikely(!check_zone_id( 1653 zone_limit->zone_id, &zone))) { 1654 OVS_NLERR(true, "zone id is out of range"); 1655 } else { 1656 struct ovs_ct_limit *ct_limit; 1657 1658 ct_limit = kmalloc(sizeof(*ct_limit), 1659 GFP_KERNEL_ACCOUNT); 1660 if (!ct_limit) 1661 return -ENOMEM; 1662 1663 ct_limit->zone = zone; 1664 ct_limit->limit = zone_limit->limit; 1665 1666 ovs_lock(); 1667 ct_limit_set(info, ct_limit); 1668 ovs_unlock(); 1669 } 1670 rem -= NLA_ALIGN(sizeof(*zone_limit)); 1671 zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit + 1672 NLA_ALIGN(sizeof(*zone_limit))); 1673 } 1674 1675 if (rem) 1676 OVS_NLERR(true, "set zone limit has %d unknown bytes", rem); 1677 1678 return 0; 1679 } 1680 1681 static int ovs_ct_limit_del_zone_limit(struct nlattr *nla_zone_limit, 1682 struct ovs_ct_limit_info *info) 1683 { 1684 struct ovs_zone_limit *zone_limit; 1685 int rem; 1686 u16 zone; 1687 1688 rem = NLA_ALIGN(nla_len(nla_zone_limit)); 1689 zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit); 1690 1691 while (rem >= sizeof(*zone_limit)) { 1692 if (unlikely(zone_limit->zone_id == 1693 OVS_ZONE_LIMIT_DEFAULT_ZONE)) { 1694 ovs_lock(); 1695 info->default_limit = OVS_CT_LIMIT_DEFAULT; 1696 ovs_unlock(); 1697 } else if (unlikely(!check_zone_id( 1698 zone_limit->zone_id, &zone))) { 1699 OVS_NLERR(true, "zone id is out of range"); 1700 } else { 1701 ovs_lock(); 1702 ct_limit_del(info, zone); 1703 ovs_unlock(); 1704 } 1705 rem -= NLA_ALIGN(sizeof(*zone_limit)); 1706 zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit + 1707 NLA_ALIGN(sizeof(*zone_limit))); 1708 } 1709 1710 if (rem) 1711 OVS_NLERR(true, "del zone limit has %d unknown bytes", rem); 1712 1713 return 0; 1714 } 1715 1716 static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info, 1717 struct sk_buff *reply) 1718 { 1719 struct ovs_zone_limit zone_limit = { 1720 .zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE, 1721 .limit = info->default_limit, 1722 }; 1723 1724 return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); 1725 } 1726 1727 static int __ovs_ct_limit_get_zone_limit(struct net *net, 1728 struct nf_conncount_data *data, 1729 u16 zone_id, u32 limit, 1730 struct sk_buff *reply) 1731 { 1732 struct nf_conntrack_zone ct_zone; 1733 struct ovs_zone_limit zone_limit; 1734 u32 conncount_key = zone_id; 1735 1736 zone_limit.zone_id = zone_id; 1737 zone_limit.limit = limit; 1738 nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0); 1739 1740 zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL, 1741 &ct_zone); 1742 return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); 1743 } 1744 1745 static int ovs_ct_limit_get_zone_limit(struct net *net, 1746 struct nlattr *nla_zone_limit, 1747 struct ovs_ct_limit_info *info, 1748 struct sk_buff *reply) 1749 { 1750 struct ovs_zone_limit *zone_limit; 1751 int rem, err; 1752 u32 limit; 1753 u16 zone; 1754 1755 rem = NLA_ALIGN(nla_len(nla_zone_limit)); 1756 zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit); 1757 1758 while (rem >= sizeof(*zone_limit)) { 1759 if (unlikely(zone_limit->zone_id == 1760 OVS_ZONE_LIMIT_DEFAULT_ZONE)) { 1761 err = ovs_ct_limit_get_default_limit(info, reply); 1762 if (err) 1763 return err; 1764 } else if (unlikely(!check_zone_id(zone_limit->zone_id, 1765 &zone))) { 1766 OVS_NLERR(true, "zone id is out of range"); 1767 } else { 1768 rcu_read_lock(); 1769 limit = ct_limit_get(info, zone); 1770 rcu_read_unlock(); 1771 1772 err = __ovs_ct_limit_get_zone_limit( 1773 net, info->data, zone, limit, reply); 1774 if (err) 1775 return err; 1776 } 1777 rem -= NLA_ALIGN(sizeof(*zone_limit)); 1778 zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit + 1779 NLA_ALIGN(sizeof(*zone_limit))); 1780 } 1781 1782 if (rem) 1783 OVS_NLERR(true, "get zone limit has %d unknown bytes", rem); 1784 1785 return 0; 1786 } 1787 1788 static int ovs_ct_limit_get_all_zone_limit(struct net *net, 1789 struct ovs_ct_limit_info *info, 1790 struct sk_buff *reply) 1791 { 1792 struct ovs_ct_limit *ct_limit; 1793 struct hlist_head *head; 1794 int i, err = 0; 1795 1796 err = ovs_ct_limit_get_default_limit(info, reply); 1797 if (err) 1798 return err; 1799 1800 rcu_read_lock(); 1801 for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { 1802 head = &info->limits[i]; 1803 hlist_for_each_entry_rcu(ct_limit, head, hlist_node) { 1804 err = __ovs_ct_limit_get_zone_limit(net, info->data, 1805 ct_limit->zone, ct_limit->limit, reply); 1806 if (err) 1807 goto exit_err; 1808 } 1809 } 1810 1811 exit_err: 1812 rcu_read_unlock(); 1813 return err; 1814 } 1815 1816 static int ovs_ct_limit_cmd_set(struct sk_buff *skb, struct genl_info *info) 1817 { 1818 struct nlattr **a = info->attrs; 1819 struct sk_buff *reply; 1820 struct ovs_header *ovs_reply_header; 1821 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 1822 struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; 1823 int err; 1824 1825 reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_SET, 1826 &ovs_reply_header); 1827 if (IS_ERR(reply)) 1828 return PTR_ERR(reply); 1829 1830 if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { 1831 err = -EINVAL; 1832 goto exit_err; 1833 } 1834 1835 err = ovs_ct_limit_set_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], 1836 ct_limit_info); 1837 if (err) 1838 goto exit_err; 1839 1840 static_branch_enable(&ovs_ct_limit_enabled); 1841 1842 genlmsg_end(reply, ovs_reply_header); 1843 return genlmsg_reply(reply, info); 1844 1845 exit_err: 1846 nlmsg_free(reply); 1847 return err; 1848 } 1849 1850 static int ovs_ct_limit_cmd_del(struct sk_buff *skb, struct genl_info *info) 1851 { 1852 struct nlattr **a = info->attrs; 1853 struct sk_buff *reply; 1854 struct ovs_header *ovs_reply_header; 1855 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 1856 struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; 1857 int err; 1858 1859 reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_DEL, 1860 &ovs_reply_header); 1861 if (IS_ERR(reply)) 1862 return PTR_ERR(reply); 1863 1864 if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { 1865 err = -EINVAL; 1866 goto exit_err; 1867 } 1868 1869 err = ovs_ct_limit_del_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], 1870 ct_limit_info); 1871 if (err) 1872 goto exit_err; 1873 1874 genlmsg_end(reply, ovs_reply_header); 1875 return genlmsg_reply(reply, info); 1876 1877 exit_err: 1878 nlmsg_free(reply); 1879 return err; 1880 } 1881 1882 static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info) 1883 { 1884 struct nlattr **a = info->attrs; 1885 struct nlattr *nla_reply; 1886 struct sk_buff *reply; 1887 struct ovs_header *ovs_reply_header; 1888 struct net *net = sock_net(skb->sk); 1889 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1890 struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; 1891 int err; 1892 1893 reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_GET, 1894 &ovs_reply_header); 1895 if (IS_ERR(reply)) 1896 return PTR_ERR(reply); 1897 1898 nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); 1899 if (!nla_reply) { 1900 err = -EMSGSIZE; 1901 goto exit_err; 1902 } 1903 1904 if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { 1905 err = ovs_ct_limit_get_zone_limit( 1906 net, a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], ct_limit_info, 1907 reply); 1908 if (err) 1909 goto exit_err; 1910 } else { 1911 err = ovs_ct_limit_get_all_zone_limit(net, ct_limit_info, 1912 reply); 1913 if (err) 1914 goto exit_err; 1915 } 1916 1917 nla_nest_end(reply, nla_reply); 1918 genlmsg_end(reply, ovs_reply_header); 1919 return genlmsg_reply(reply, info); 1920 1921 exit_err: 1922 nlmsg_free(reply); 1923 return err; 1924 } 1925 1926 static const struct genl_small_ops ct_limit_genl_ops[] = { 1927 { .cmd = OVS_CT_LIMIT_CMD_SET, 1928 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1929 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN 1930 * privilege. 1931 */ 1932 .doit = ovs_ct_limit_cmd_set, 1933 }, 1934 { .cmd = OVS_CT_LIMIT_CMD_DEL, 1935 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1936 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN 1937 * privilege. 1938 */ 1939 .doit = ovs_ct_limit_cmd_del, 1940 }, 1941 { .cmd = OVS_CT_LIMIT_CMD_GET, 1942 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1943 .flags = 0, /* OK for unprivileged users. */ 1944 .doit = ovs_ct_limit_cmd_get, 1945 }, 1946 }; 1947 1948 static const struct genl_multicast_group ovs_ct_limit_multicast_group = { 1949 .name = OVS_CT_LIMIT_MCGROUP, 1950 }; 1951 1952 struct genl_family dp_ct_limit_genl_family __ro_after_init = { 1953 .hdrsize = sizeof(struct ovs_header), 1954 .name = OVS_CT_LIMIT_FAMILY, 1955 .version = OVS_CT_LIMIT_VERSION, 1956 .maxattr = OVS_CT_LIMIT_ATTR_MAX, 1957 .policy = ct_limit_policy, 1958 .netnsok = true, 1959 .parallel_ops = true, 1960 .small_ops = ct_limit_genl_ops, 1961 .n_small_ops = ARRAY_SIZE(ct_limit_genl_ops), 1962 .resv_start_op = OVS_CT_LIMIT_CMD_GET + 1, 1963 .mcgrps = &ovs_ct_limit_multicast_group, 1964 .n_mcgrps = 1, 1965 .module = THIS_MODULE, 1966 }; 1967 #endif 1968 1969 int ovs_ct_init(struct net *net) 1970 { 1971 unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; 1972 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1973 1974 if (nf_connlabels_get(net, n_bits - 1)) { 1975 ovs_net->xt_label = false; 1976 OVS_NLERR(true, "Failed to set connlabel length"); 1977 } else { 1978 ovs_net->xt_label = true; 1979 } 1980 1981 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 1982 return ovs_ct_limit_init(net, ovs_net); 1983 #else 1984 return 0; 1985 #endif 1986 } 1987 1988 void ovs_ct_exit(struct net *net) 1989 { 1990 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1991 1992 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 1993 ovs_ct_limit_exit(net, ovs_net); 1994 #endif 1995 1996 if (ovs_net->xt_label) 1997 nf_connlabels_put(net); 1998 } 1999