1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2015 Nicira, Inc.
4 */
5
6 #include <linux/module.h>
7 #include <linux/openvswitch.h>
8 #include <linux/tcp.h>
9 #include <linux/udp.h>
10 #include <linux/sctp.h>
11 #include <linux/static_key.h>
12 #include <linux/string_helpers.h>
13 #include <net/ip.h>
14 #include <net/genetlink.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_count.h>
17 #include <net/netfilter/nf_conntrack_helper.h>
18 #include <net/netfilter/nf_conntrack_labels.h>
19 #include <net/netfilter/nf_conntrack_seqadj.h>
20 #include <net/netfilter/nf_conntrack_timeout.h>
21 #include <net/netfilter/nf_conntrack_zones.h>
22 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
23 #include <net/ipv6_frag.h>
24
25 #if IS_ENABLED(CONFIG_NF_NAT)
26 #include <net/netfilter/nf_nat.h>
27 #endif
28
29 #include <net/netfilter/nf_conntrack_act_ct.h>
30
31 #include "datapath.h"
32 #include "drop.h"
33 #include "conntrack.h"
34 #include "flow.h"
35 #include "flow_netlink.h"
36
37 struct ovs_ct_len_tbl {
38 int maxlen;
39 int minlen;
40 };
41
42 /* Metadata mark for masked write to conntrack mark */
43 struct md_mark {
44 u32 value;
45 u32 mask;
46 };
47
48 /* Metadata label for masked write to conntrack label. */
49 struct md_labels {
50 struct ovs_key_ct_labels value;
51 struct ovs_key_ct_labels mask;
52 };
53
54 enum ovs_ct_nat {
55 OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */
56 OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */
57 OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */
58 };
59
60 /* Conntrack action context for execution. */
61 struct ovs_conntrack_info {
62 struct nf_conntrack_helper *helper;
63 struct nf_conntrack_zone zone;
64 struct nf_conn *ct;
65 u8 commit : 1;
66 u8 nat : 3; /* enum ovs_ct_nat */
67 u8 force : 1;
68 u8 have_eventmask : 1;
69 u16 family;
70 u32 eventmask; /* Mask of 1 << IPCT_*. */
71 struct md_mark mark;
72 struct md_labels labels;
73 char timeout[CTNL_TIMEOUT_NAME_MAX];
74 struct nf_ct_timeout *nf_ct_timeout;
75 #if IS_ENABLED(CONFIG_NF_NAT)
76 struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */
77 #endif
78 };
79
80 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
81 #define OVS_CT_LIMIT_UNLIMITED 0
82 #define OVS_CT_LIMIT_DEFAULT OVS_CT_LIMIT_UNLIMITED
83 #define CT_LIMIT_HASH_BUCKETS 512
84 static DEFINE_STATIC_KEY_FALSE(ovs_ct_limit_enabled);
85
86 struct ovs_ct_limit {
87 /* Elements in ovs_ct_limit_info->limits hash table */
88 struct hlist_node hlist_node;
89 struct rcu_head rcu;
90 u16 zone;
91 u32 limit;
92 };
93
94 struct ovs_ct_limit_info {
95 u32 default_limit;
96 struct hlist_head *limits;
97 struct nf_conncount_data *data;
98 };
99
100 static const struct nla_policy ct_limit_policy[OVS_CT_LIMIT_ATTR_MAX + 1] = {
101 [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NLA_NESTED, },
102 };
103 #endif
104
105 static bool labels_nonzero(const struct ovs_key_ct_labels *labels);
106
107 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
108
key_to_nfproto(const struct sw_flow_key * key)109 static u16 key_to_nfproto(const struct sw_flow_key *key)
110 {
111 switch (ntohs(key->eth.type)) {
112 case ETH_P_IP:
113 return NFPROTO_IPV4;
114 case ETH_P_IPV6:
115 return NFPROTO_IPV6;
116 default:
117 return NFPROTO_UNSPEC;
118 }
119 }
120
121 /* Map SKB connection state into the values used by flow definition. */
ovs_ct_get_state(enum ip_conntrack_info ctinfo)122 static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
123 {
124 u8 ct_state = OVS_CS_F_TRACKED;
125
126 switch (ctinfo) {
127 case IP_CT_ESTABLISHED_REPLY:
128 case IP_CT_RELATED_REPLY:
129 ct_state |= OVS_CS_F_REPLY_DIR;
130 break;
131 default:
132 break;
133 }
134
135 switch (ctinfo) {
136 case IP_CT_ESTABLISHED:
137 case IP_CT_ESTABLISHED_REPLY:
138 ct_state |= OVS_CS_F_ESTABLISHED;
139 break;
140 case IP_CT_RELATED:
141 case IP_CT_RELATED_REPLY:
142 ct_state |= OVS_CS_F_RELATED;
143 break;
144 case IP_CT_NEW:
145 ct_state |= OVS_CS_F_NEW;
146 break;
147 default:
148 break;
149 }
150
151 return ct_state;
152 }
153
ovs_ct_get_mark(const struct nf_conn * ct)154 static u32 ovs_ct_get_mark(const struct nf_conn *ct)
155 {
156 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
157 return ct ? READ_ONCE(ct->mark) : 0;
158 #else
159 return 0;
160 #endif
161 }
162
163 /* Guard against conntrack labels max size shrinking below 128 bits. */
164 #if NF_CT_LABELS_MAX_SIZE < 16
165 #error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes
166 #endif
167
ovs_ct_get_labels(const struct nf_conn * ct,struct ovs_key_ct_labels * labels)168 static void ovs_ct_get_labels(const struct nf_conn *ct,
169 struct ovs_key_ct_labels *labels)
170 {
171 struct nf_conn_labels *cl = NULL;
172
173 if (ct) {
174 if (ct->master && !nf_ct_is_confirmed(ct))
175 ct = ct->master;
176 cl = nf_ct_labels_find(ct);
177 }
178 if (cl)
179 memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
180 else
181 memset(labels, 0, OVS_CT_LABELS_LEN);
182 }
183
__ovs_ct_update_key_orig_tp(struct sw_flow_key * key,const struct nf_conntrack_tuple * orig,u8 icmp_proto)184 static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key,
185 const struct nf_conntrack_tuple *orig,
186 u8 icmp_proto)
187 {
188 key->ct_orig_proto = orig->dst.protonum;
189 if (orig->dst.protonum == icmp_proto) {
190 key->ct.orig_tp.src = htons(orig->dst.u.icmp.type);
191 key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code);
192 } else {
193 key->ct.orig_tp.src = orig->src.u.all;
194 key->ct.orig_tp.dst = orig->dst.u.all;
195 }
196 }
197
__ovs_ct_update_key(struct sw_flow_key * key,u8 state,const struct nf_conntrack_zone * zone,const struct nf_conn * ct)198 static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
199 const struct nf_conntrack_zone *zone,
200 const struct nf_conn *ct)
201 {
202 key->ct_state = state;
203 key->ct_zone = zone->id;
204 key->ct.mark = ovs_ct_get_mark(ct);
205 ovs_ct_get_labels(ct, &key->ct.labels);
206
207 if (ct) {
208 const struct nf_conntrack_tuple *orig;
209
210 /* Use the master if we have one. */
211 if (ct->master)
212 ct = ct->master;
213 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
214
215 /* IP version must match with the master connection. */
216 if (key->eth.type == htons(ETH_P_IP) &&
217 nf_ct_l3num(ct) == NFPROTO_IPV4) {
218 key->ipv4.ct_orig.src = orig->src.u3.ip;
219 key->ipv4.ct_orig.dst = orig->dst.u3.ip;
220 __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP);
221 return;
222 } else if (key->eth.type == htons(ETH_P_IPV6) &&
223 !sw_flow_key_is_nd(key) &&
224 nf_ct_l3num(ct) == NFPROTO_IPV6) {
225 key->ipv6.ct_orig.src = orig->src.u3.in6;
226 key->ipv6.ct_orig.dst = orig->dst.u3.in6;
227 __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP);
228 return;
229 }
230 }
231 /* Clear 'ct_orig_proto' to mark the non-existence of conntrack
232 * original direction key fields.
233 */
234 key->ct_orig_proto = 0;
235 }
236
237 /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has
238 * previously sent the packet to conntrack via the ct action. If
239 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
240 * initialized from the connection status.
241 */
ovs_ct_update_key(const struct sk_buff * skb,const struct ovs_conntrack_info * info,struct sw_flow_key * key,bool post_ct,bool keep_nat_flags)242 static void ovs_ct_update_key(const struct sk_buff *skb,
243 const struct ovs_conntrack_info *info,
244 struct sw_flow_key *key, bool post_ct,
245 bool keep_nat_flags)
246 {
247 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
248 enum ip_conntrack_info ctinfo;
249 struct nf_conn *ct;
250 u8 state = 0;
251
252 ct = nf_ct_get(skb, &ctinfo);
253 if (ct) {
254 state = ovs_ct_get_state(ctinfo);
255 /* All unconfirmed entries are NEW connections. */
256 if (!nf_ct_is_confirmed(ct))
257 state |= OVS_CS_F_NEW;
258 /* OVS persists the related flag for the duration of the
259 * connection.
260 */
261 if (ct->master)
262 state |= OVS_CS_F_RELATED;
263 if (keep_nat_flags) {
264 state |= key->ct_state & OVS_CS_F_NAT_MASK;
265 } else {
266 if (ct->status & IPS_SRC_NAT)
267 state |= OVS_CS_F_SRC_NAT;
268 if (ct->status & IPS_DST_NAT)
269 state |= OVS_CS_F_DST_NAT;
270 }
271 zone = nf_ct_zone(ct);
272 } else if (post_ct) {
273 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
274 if (info)
275 zone = &info->zone;
276 }
277 __ovs_ct_update_key(key, state, zone, ct);
278 }
279
280 /* This is called to initialize CT key fields possibly coming in from the local
281 * stack.
282 */
ovs_ct_fill_key(const struct sk_buff * skb,struct sw_flow_key * key,bool post_ct)283 void ovs_ct_fill_key(const struct sk_buff *skb,
284 struct sw_flow_key *key,
285 bool post_ct)
286 {
287 ovs_ct_update_key(skb, NULL, key, post_ct, false);
288 }
289
ovs_ct_put_key(const struct sw_flow_key * swkey,const struct sw_flow_key * output,struct sk_buff * skb)290 int ovs_ct_put_key(const struct sw_flow_key *swkey,
291 const struct sw_flow_key *output, struct sk_buff *skb)
292 {
293 if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state))
294 return -EMSGSIZE;
295
296 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
297 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone))
298 return -EMSGSIZE;
299
300 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
301 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark))
302 return -EMSGSIZE;
303
304 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
305 nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels),
306 &output->ct.labels))
307 return -EMSGSIZE;
308
309 if (swkey->ct_orig_proto) {
310 if (swkey->eth.type == htons(ETH_P_IP)) {
311 struct ovs_key_ct_tuple_ipv4 orig;
312
313 memset(&orig, 0, sizeof(orig));
314 orig.ipv4_src = output->ipv4.ct_orig.src;
315 orig.ipv4_dst = output->ipv4.ct_orig.dst;
316 orig.src_port = output->ct.orig_tp.src;
317 orig.dst_port = output->ct.orig_tp.dst;
318 orig.ipv4_proto = output->ct_orig_proto;
319
320 if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
321 sizeof(orig), &orig))
322 return -EMSGSIZE;
323 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
324 struct ovs_key_ct_tuple_ipv6 orig;
325
326 memset(&orig, 0, sizeof(orig));
327 memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32,
328 sizeof(orig.ipv6_src));
329 memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32,
330 sizeof(orig.ipv6_dst));
331 orig.src_port = output->ct.orig_tp.src;
332 orig.dst_port = output->ct.orig_tp.dst;
333 orig.ipv6_proto = output->ct_orig_proto;
334
335 if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
336 sizeof(orig), &orig))
337 return -EMSGSIZE;
338 }
339 }
340
341 return 0;
342 }
343
ovs_ct_set_mark(struct nf_conn * ct,struct sw_flow_key * key,u32 ct_mark,u32 mask)344 static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
345 u32 ct_mark, u32 mask)
346 {
347 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
348 u32 new_mark;
349
350 new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask));
351 if (READ_ONCE(ct->mark) != new_mark) {
352 WRITE_ONCE(ct->mark, new_mark);
353 if (nf_ct_is_confirmed(ct))
354 nf_conntrack_event_cache(IPCT_MARK, ct);
355 key->ct.mark = new_mark;
356 }
357
358 return 0;
359 #else
360 return -ENOTSUPP;
361 #endif
362 }
363
ovs_ct_get_conn_labels(struct nf_conn * ct)364 static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct)
365 {
366 struct nf_conn_labels *cl;
367
368 cl = nf_ct_labels_find(ct);
369 if (!cl) {
370 nf_ct_labels_ext_add(ct);
371 cl = nf_ct_labels_find(ct);
372 }
373
374 return cl;
375 }
376
377 /* Initialize labels for a new, yet to be committed conntrack entry. Note that
378 * since the new connection is not yet confirmed, and thus no-one else has
379 * access to it's labels, we simply write them over.
380 */
ovs_ct_init_labels(struct nf_conn * ct,struct sw_flow_key * key,const struct ovs_key_ct_labels * labels,const struct ovs_key_ct_labels * mask)381 static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key,
382 const struct ovs_key_ct_labels *labels,
383 const struct ovs_key_ct_labels *mask)
384 {
385 struct nf_conn_labels *cl, *master_cl;
386 bool have_mask = labels_nonzero(mask);
387
388 /* Inherit master's labels to the related connection? */
389 master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL;
390
391 if (!master_cl && !have_mask)
392 return 0; /* Nothing to do. */
393
394 cl = ovs_ct_get_conn_labels(ct);
395 if (!cl)
396 return -ENOSPC;
397
398 /* Inherit the master's labels, if any. */
399 if (master_cl)
400 *cl = *master_cl;
401
402 if (have_mask) {
403 u32 *dst = (u32 *)cl->bits;
404 int i;
405
406 for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
407 dst[i] = (dst[i] & ~mask->ct_labels_32[i]) |
408 (labels->ct_labels_32[i]
409 & mask->ct_labels_32[i]);
410 }
411
412 /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the
413 * IPCT_LABEL bit is set in the event cache.
414 */
415 nf_conntrack_event_cache(IPCT_LABEL, ct);
416
417 memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
418
419 return 0;
420 }
421
ovs_ct_set_labels(struct nf_conn * ct,struct sw_flow_key * key,const struct ovs_key_ct_labels * labels,const struct ovs_key_ct_labels * mask)422 static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
423 const struct ovs_key_ct_labels *labels,
424 const struct ovs_key_ct_labels *mask)
425 {
426 struct nf_conn_labels *cl;
427 int err;
428
429 cl = ovs_ct_get_conn_labels(ct);
430 if (!cl)
431 return -ENOSPC;
432
433 err = nf_connlabels_replace(ct, labels->ct_labels_32,
434 mask->ct_labels_32,
435 OVS_CT_LABELS_LEN_32);
436 if (err)
437 return err;
438
439 memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
440
441 return 0;
442 }
443
ovs_ct_handle_fragments(struct net * net,struct sw_flow_key * key,u16 zone,int family,struct sk_buff * skb)444 static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
445 u16 zone, int family, struct sk_buff *skb)
446 {
447 struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
448 int err;
449
450 err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru);
451 if (err)
452 return err;
453
454 /* The key extracted from the fragment that completed this datagram
455 * likely didn't have an L4 header, so regenerate it.
456 */
457 ovs_flow_key_update_l3l4(skb, key);
458 key->ip.frag = OVS_FRAG_TYPE_NONE;
459 *OVS_CB(skb) = ovs_cb;
460
461 return 0;
462 }
463
464 /* This replicates logic from nf_conntrack_core.c that is not exported. */
465 static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash * h)466 ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
467 {
468 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
469
470 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
471 return IP_CT_ESTABLISHED_REPLY;
472 /* Once we've had two way comms, always ESTABLISHED. */
473 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status))
474 return IP_CT_ESTABLISHED;
475 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
476 return IP_CT_RELATED;
477 return IP_CT_NEW;
478 }
479
480 /* Find an existing connection which this packet belongs to without
481 * re-attributing statistics or modifying the connection state. This allows an
482 * skb->_nfct lost due to an upcall to be recovered during actions execution.
483 *
484 * Must be called with rcu_read_lock.
485 *
486 * On success, populates skb->_nfct and returns the connection. Returns NULL
487 * if there is no existing entry.
488 */
489 static struct nf_conn *
ovs_ct_find_existing(struct net * net,const struct nf_conntrack_zone * zone,u8 l3num,struct sk_buff * skb,bool natted)490 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
491 u8 l3num, struct sk_buff *skb, bool natted)
492 {
493 struct nf_conntrack_tuple tuple;
494 struct nf_conntrack_tuple_hash *h;
495 struct nf_conn *ct;
496
497 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num,
498 net, &tuple)) {
499 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
500 return NULL;
501 }
502
503 /* Must invert the tuple if skb has been transformed by NAT. */
504 if (natted) {
505 struct nf_conntrack_tuple inverse;
506
507 if (!nf_ct_invert_tuple(&inverse, &tuple)) {
508 pr_debug("ovs_ct_find_existing: Inversion failed!\n");
509 return NULL;
510 }
511 tuple = inverse;
512 }
513
514 /* look for tuple match */
515 h = nf_conntrack_find_get(net, zone, &tuple);
516 if (!h)
517 return NULL; /* Not found. */
518
519 ct = nf_ct_tuplehash_to_ctrack(h);
520
521 /* Inverted packet tuple matches the reverse direction conntrack tuple,
522 * select the other tuplehash to get the right 'ctinfo' bits for this
523 * packet.
524 */
525 if (natted)
526 h = &ct->tuplehash[!h->tuple.dst.dir];
527
528 nf_ct_set(skb, ct, ovs_ct_get_info(h));
529 return ct;
530 }
531
532 static
ovs_ct_executed(struct net * net,const struct sw_flow_key * key,const struct ovs_conntrack_info * info,struct sk_buff * skb,bool * ct_executed)533 struct nf_conn *ovs_ct_executed(struct net *net,
534 const struct sw_flow_key *key,
535 const struct ovs_conntrack_info *info,
536 struct sk_buff *skb,
537 bool *ct_executed)
538 {
539 struct nf_conn *ct = NULL;
540
541 /* If no ct, check if we have evidence that an existing conntrack entry
542 * might be found for this skb. This happens when we lose a skb->_nfct
543 * due to an upcall, or if the direction is being forced. If the
544 * connection was not confirmed, it is not cached and needs to be run
545 * through conntrack again.
546 */
547 *ct_executed = (key->ct_state & OVS_CS_F_TRACKED) &&
548 !(key->ct_state & OVS_CS_F_INVALID) &&
549 (key->ct_zone == info->zone.id);
550
551 if (*ct_executed || (!key->ct_state && info->force)) {
552 ct = ovs_ct_find_existing(net, &info->zone, info->family, skb,
553 !!(key->ct_state &
554 OVS_CS_F_NAT_MASK));
555 }
556
557 return ct;
558 }
559
560 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
skb_nfct_cached(struct net * net,const struct sw_flow_key * key,const struct ovs_conntrack_info * info,struct sk_buff * skb)561 static bool skb_nfct_cached(struct net *net,
562 const struct sw_flow_key *key,
563 const struct ovs_conntrack_info *info,
564 struct sk_buff *skb)
565 {
566 enum ip_conntrack_info ctinfo;
567 struct nf_conn *ct;
568 bool ct_executed = true;
569
570 ct = nf_ct_get(skb, &ctinfo);
571 if (!ct)
572 ct = ovs_ct_executed(net, key, info, skb, &ct_executed);
573
574 if (ct)
575 nf_ct_get(skb, &ctinfo);
576 else
577 return false;
578
579 if (!net_eq(net, read_pnet(&ct->ct_net)))
580 return false;
581 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
582 return false;
583 if (info->helper) {
584 struct nf_conn_help *help;
585
586 help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
587 if (help && rcu_access_pointer(help->helper) != info->helper)
588 return false;
589 }
590 if (info->nf_ct_timeout) {
591 struct nf_conn_timeout *timeout_ext;
592
593 timeout_ext = nf_ct_timeout_find(ct);
594 if (!timeout_ext || info->nf_ct_timeout !=
595 rcu_dereference(timeout_ext->timeout))
596 return false;
597 }
598 /* Force conntrack entry direction to the current packet? */
599 if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
600 /* Delete the conntrack entry if confirmed, else just release
601 * the reference.
602 */
603 if (nf_ct_is_confirmed(ct))
604 nf_ct_delete(ct, 0, 0);
605
606 nf_ct_put(ct);
607 nf_ct_set(skb, NULL, 0);
608 return false;
609 }
610
611 return ct_executed;
612 }
613
614 #if IS_ENABLED(CONFIG_NF_NAT)
ovs_nat_update_key(struct sw_flow_key * key,const struct sk_buff * skb,enum nf_nat_manip_type maniptype)615 static void ovs_nat_update_key(struct sw_flow_key *key,
616 const struct sk_buff *skb,
617 enum nf_nat_manip_type maniptype)
618 {
619 if (maniptype == NF_NAT_MANIP_SRC) {
620 __be16 src;
621
622 key->ct_state |= OVS_CS_F_SRC_NAT;
623 if (key->eth.type == htons(ETH_P_IP))
624 key->ipv4.addr.src = ip_hdr(skb)->saddr;
625 else if (key->eth.type == htons(ETH_P_IPV6))
626 memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
627 sizeof(key->ipv6.addr.src));
628 else
629 return;
630
631 if (key->ip.proto == IPPROTO_UDP)
632 src = udp_hdr(skb)->source;
633 else if (key->ip.proto == IPPROTO_TCP)
634 src = tcp_hdr(skb)->source;
635 else if (key->ip.proto == IPPROTO_SCTP)
636 src = sctp_hdr(skb)->source;
637 else
638 return;
639
640 key->tp.src = src;
641 } else {
642 __be16 dst;
643
644 key->ct_state |= OVS_CS_F_DST_NAT;
645 if (key->eth.type == htons(ETH_P_IP))
646 key->ipv4.addr.dst = ip_hdr(skb)->daddr;
647 else if (key->eth.type == htons(ETH_P_IPV6))
648 memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
649 sizeof(key->ipv6.addr.dst));
650 else
651 return;
652
653 if (key->ip.proto == IPPROTO_UDP)
654 dst = udp_hdr(skb)->dest;
655 else if (key->ip.proto == IPPROTO_TCP)
656 dst = tcp_hdr(skb)->dest;
657 else if (key->ip.proto == IPPROTO_SCTP)
658 dst = sctp_hdr(skb)->dest;
659 else
660 return;
661
662 key->tp.dst = dst;
663 }
664 }
665
666 /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
ovs_ct_nat(struct net * net,struct sw_flow_key * key,const struct ovs_conntrack_info * info,struct sk_buff * skb,struct nf_conn * ct,enum ip_conntrack_info ctinfo)667 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
668 const struct ovs_conntrack_info *info,
669 struct sk_buff *skb, struct nf_conn *ct,
670 enum ip_conntrack_info ctinfo)
671 {
672 int err, action = 0;
673
674 if (!(info->nat & OVS_CT_NAT))
675 return NF_ACCEPT;
676 if (info->nat & OVS_CT_SRC_NAT)
677 action |= BIT(NF_NAT_MANIP_SRC);
678 if (info->nat & OVS_CT_DST_NAT)
679 action |= BIT(NF_NAT_MANIP_DST);
680
681 err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
682
683 if (action & BIT(NF_NAT_MANIP_SRC))
684 ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
685 if (action & BIT(NF_NAT_MANIP_DST))
686 ovs_nat_update_key(key, skb, NF_NAT_MANIP_DST);
687
688 return err;
689 }
690 #else /* !CONFIG_NF_NAT */
ovs_ct_nat(struct net * net,struct sw_flow_key * key,const struct ovs_conntrack_info * info,struct sk_buff * skb,struct nf_conn * ct,enum ip_conntrack_info ctinfo)691 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
692 const struct ovs_conntrack_info *info,
693 struct sk_buff *skb, struct nf_conn *ct,
694 enum ip_conntrack_info ctinfo)
695 {
696 return NF_ACCEPT;
697 }
698 #endif
699
700 /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
701 * not done already. Update key with new CT state after passing the packet
702 * through conntrack.
703 * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be
704 * set to NULL and 0 will be returned.
705 */
__ovs_ct_lookup(struct net * net,struct sw_flow_key * key,const struct ovs_conntrack_info * info,struct sk_buff * skb)706 static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
707 const struct ovs_conntrack_info *info,
708 struct sk_buff *skb)
709 {
710 /* If we are recirculating packets to match on conntrack fields and
711 * committing with a separate conntrack action, then we don't need to
712 * actually run the packet through conntrack twice unless it's for a
713 * different zone.
714 */
715 bool cached = skb_nfct_cached(net, key, info, skb);
716 enum ip_conntrack_info ctinfo;
717 struct nf_conn *ct;
718
719 if (!cached) {
720 struct nf_hook_state state = {
721 .hook = NF_INET_PRE_ROUTING,
722 .pf = info->family,
723 .net = net,
724 };
725 struct nf_conn *tmpl = info->ct;
726 int err;
727
728 /* Associate skb with specified zone. */
729 if (tmpl) {
730 ct = nf_ct_get(skb, &ctinfo);
731 nf_ct_put(ct);
732 nf_conntrack_get(&tmpl->ct_general);
733 nf_ct_set(skb, tmpl, IP_CT_NEW);
734 }
735
736 err = nf_conntrack_in(skb, &state);
737 if (err != NF_ACCEPT)
738 return -ENOENT;
739
740 /* Clear CT state NAT flags to mark that we have not yet done
741 * NAT after the nf_conntrack_in() call. We can actually clear
742 * the whole state, as it will be re-initialized below.
743 */
744 key->ct_state = 0;
745
746 /* Update the key, but keep the NAT flags. */
747 ovs_ct_update_key(skb, info, key, true, true);
748 }
749
750 ct = nf_ct_get(skb, &ctinfo);
751 if (ct) {
752 bool add_helper = false;
753
754 /* Packets starting a new connection must be NATted before the
755 * helper, so that the helper knows about the NAT. We enforce
756 * this by delaying both NAT and helper calls for unconfirmed
757 * connections until the committing CT action. For later
758 * packets NAT and Helper may be called in either order.
759 *
760 * NAT will be done only if the CT action has NAT, and only
761 * once per packet (per zone), as guarded by the NAT bits in
762 * the key->ct_state.
763 */
764 if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
765 (nf_ct_is_confirmed(ct) || info->commit) &&
766 ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
767 return -EINVAL;
768 }
769
770 /* Userspace may decide to perform a ct lookup without a helper
771 * specified followed by a (recirculate and) commit with one,
772 * or attach a helper in a later commit. Therefore, for
773 * connections which we will commit, we may need to attach
774 * the helper here.
775 */
776 if (!nf_ct_is_confirmed(ct) && info->commit &&
777 info->helper && !nfct_help(ct)) {
778 int err = __nf_ct_try_assign_helper(ct, info->ct,
779 GFP_ATOMIC);
780 if (err)
781 return err;
782 add_helper = true;
783
784 /* helper installed, add seqadj if NAT is required */
785 if (info->nat && !nfct_seqadj(ct)) {
786 if (!nfct_seqadj_ext_add(ct))
787 return -EINVAL;
788 }
789 }
790
791 /* Call the helper only if:
792 * - nf_conntrack_in() was executed above ("!cached") or a
793 * helper was just attached ("add_helper") for a confirmed
794 * connection, or
795 * - When committing an unconfirmed connection.
796 */
797 if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
798 info->commit) &&
799 nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) {
800 return -EINVAL;
801 }
802
803 if (nf_ct_protonum(ct) == IPPROTO_TCP &&
804 nf_ct_is_confirmed(ct) && nf_conntrack_tcp_established(ct)) {
805 /* Be liberal for tcp packets so that out-of-window
806 * packets are not marked invalid.
807 */
808 nf_ct_set_tcp_be_liberal(ct);
809 }
810
811 nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
812 }
813
814 return 0;
815 }
816
817 /* Lookup connection and read fields into key. */
ovs_ct_lookup(struct net * net,struct sw_flow_key * key,const struct ovs_conntrack_info * info,struct sk_buff * skb)818 static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
819 const struct ovs_conntrack_info *info,
820 struct sk_buff *skb)
821 {
822 struct nf_conn *ct;
823 int err;
824
825 err = __ovs_ct_lookup(net, key, info, skb);
826 if (err)
827 return err;
828
829 ct = (struct nf_conn *)skb_nfct(skb);
830 if (ct)
831 nf_ct_deliver_cached_events(ct);
832
833 return 0;
834 }
835
labels_nonzero(const struct ovs_key_ct_labels * labels)836 static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
837 {
838 size_t i;
839
840 for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
841 if (labels->ct_labels_32[i])
842 return true;
843
844 return false;
845 }
846
847 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
ct_limit_hash_bucket(const struct ovs_ct_limit_info * info,u16 zone)848 static struct hlist_head *ct_limit_hash_bucket(
849 const struct ovs_ct_limit_info *info, u16 zone)
850 {
851 return &info->limits[zone & (CT_LIMIT_HASH_BUCKETS - 1)];
852 }
853
854 /* Call with ovs_mutex */
ct_limit_set(const struct ovs_ct_limit_info * info,struct ovs_ct_limit * new_ct_limit)855 static void ct_limit_set(const struct ovs_ct_limit_info *info,
856 struct ovs_ct_limit *new_ct_limit)
857 {
858 struct ovs_ct_limit *ct_limit;
859 struct hlist_head *head;
860
861 head = ct_limit_hash_bucket(info, new_ct_limit->zone);
862 hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
863 if (ct_limit->zone == new_ct_limit->zone) {
864 hlist_replace_rcu(&ct_limit->hlist_node,
865 &new_ct_limit->hlist_node);
866 kfree_rcu(ct_limit, rcu);
867 return;
868 }
869 }
870
871 hlist_add_head_rcu(&new_ct_limit->hlist_node, head);
872 }
873
874 /* Call with ovs_mutex */
ct_limit_del(const struct ovs_ct_limit_info * info,u16 zone)875 static void ct_limit_del(const struct ovs_ct_limit_info *info, u16 zone)
876 {
877 struct ovs_ct_limit *ct_limit;
878 struct hlist_head *head;
879 struct hlist_node *n;
880
881 head = ct_limit_hash_bucket(info, zone);
882 hlist_for_each_entry_safe(ct_limit, n, head, hlist_node) {
883 if (ct_limit->zone == zone) {
884 hlist_del_rcu(&ct_limit->hlist_node);
885 kfree_rcu(ct_limit, rcu);
886 return;
887 }
888 }
889 }
890
891 /* Call with RCU read lock */
ct_limit_get(const struct ovs_ct_limit_info * info,u16 zone)892 static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone)
893 {
894 struct ovs_ct_limit *ct_limit;
895 struct hlist_head *head;
896
897 head = ct_limit_hash_bucket(info, zone);
898 hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
899 if (ct_limit->zone == zone)
900 return ct_limit->limit;
901 }
902
903 return info->default_limit;
904 }
905
ovs_ct_check_limit(struct net * net,const struct ovs_conntrack_info * info,const struct nf_conntrack_tuple * tuple)906 static int ovs_ct_check_limit(struct net *net,
907 const struct ovs_conntrack_info *info,
908 const struct nf_conntrack_tuple *tuple)
909 {
910 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
911 const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
912 u32 per_zone_limit, connections;
913 u32 conncount_key;
914
915 conncount_key = info->zone.id;
916
917 per_zone_limit = ct_limit_get(ct_limit_info, info->zone.id);
918 if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
919 return 0;
920
921 connections = nf_conncount_count(net, ct_limit_info->data,
922 &conncount_key, tuple, &info->zone);
923 if (connections > per_zone_limit)
924 return -ENOMEM;
925
926 return 0;
927 }
928 #endif
929
930 /* Lookup connection and confirm if unconfirmed. */
ovs_ct_commit(struct net * net,struct sw_flow_key * key,const struct ovs_conntrack_info * info,struct sk_buff * skb)931 static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
932 const struct ovs_conntrack_info *info,
933 struct sk_buff *skb)
934 {
935 enum ip_conntrack_info ctinfo;
936 struct nf_conn *ct;
937 int err;
938
939 err = __ovs_ct_lookup(net, key, info, skb);
940 if (err)
941 return err;
942
943 /* The connection could be invalid, in which case this is a no-op.*/
944 ct = nf_ct_get(skb, &ctinfo);
945 if (!ct)
946 return 0;
947
948 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
949 if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
950 if (!nf_ct_is_confirmed(ct)) {
951 err = ovs_ct_check_limit(net, info,
952 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
953 if (err) {
954 net_warn_ratelimited("openvswitch: zone: %u "
955 "exceeds conntrack limit\n",
956 info->zone.id);
957 return err;
958 }
959 }
960 }
961 #endif
962
963 /* Set the conntrack event mask if given. NEW and DELETE events have
964 * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener
965 * typically would receive many kinds of updates. Setting the event
966 * mask allows those events to be filtered. The set event mask will
967 * remain in effect for the lifetime of the connection unless changed
968 * by a further CT action with both the commit flag and the eventmask
969 * option. */
970 if (info->have_eventmask) {
971 struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct);
972
973 if (cache)
974 cache->ctmask = info->eventmask;
975 }
976
977 /* Apply changes before confirming the connection so that the initial
978 * conntrack NEW netlink event carries the values given in the CT
979 * action.
980 */
981 if (info->mark.mask) {
982 err = ovs_ct_set_mark(ct, key, info->mark.value,
983 info->mark.mask);
984 if (err)
985 return err;
986 }
987 if (!nf_ct_is_confirmed(ct)) {
988 err = ovs_ct_init_labels(ct, key, &info->labels.value,
989 &info->labels.mask);
990 if (err)
991 return err;
992
993 nf_conn_act_ct_ext_add(skb, ct, ctinfo);
994 } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
995 labels_nonzero(&info->labels.mask)) {
996 err = ovs_ct_set_labels(ct, key, &info->labels.value,
997 &info->labels.mask);
998 if (err)
999 return err;
1000 }
1001 /* This will take care of sending queued events even if the connection
1002 * is already confirmed.
1003 */
1004 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
1005 return -EINVAL;
1006
1007 return 0;
1008 }
1009
1010 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
1011 * value if 'skb' is freed.
1012 */
ovs_ct_execute(struct net * net,struct sk_buff * skb,struct sw_flow_key * key,const struct ovs_conntrack_info * info)1013 int ovs_ct_execute(struct net *net, struct sk_buff *skb,
1014 struct sw_flow_key *key,
1015 const struct ovs_conntrack_info *info)
1016 {
1017 int nh_ofs;
1018 int err;
1019
1020 /* The conntrack module expects to be working at L3. */
1021 nh_ofs = skb_network_offset(skb);
1022 skb_pull_rcsum(skb, nh_ofs);
1023
1024 err = nf_ct_skb_network_trim(skb, info->family);
1025 if (err) {
1026 kfree_skb(skb);
1027 return err;
1028 }
1029
1030 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
1031 err = ovs_ct_handle_fragments(net, key, info->zone.id,
1032 info->family, skb);
1033 if (err)
1034 return err;
1035 }
1036
1037 if (info->commit)
1038 err = ovs_ct_commit(net, key, info, skb);
1039 else
1040 err = ovs_ct_lookup(net, key, info, skb);
1041
1042 skb_push_rcsum(skb, nh_ofs);
1043 if (err)
1044 ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
1045 return err;
1046 }
1047
ovs_ct_clear(struct sk_buff * skb,struct sw_flow_key * key)1048 int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
1049 {
1050 enum ip_conntrack_info ctinfo;
1051 struct nf_conn *ct;
1052
1053 ct = nf_ct_get(skb, &ctinfo);
1054
1055 nf_ct_put(ct);
1056 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
1057
1058 if (key)
1059 ovs_ct_fill_key(skb, key, false);
1060
1061 return 0;
1062 }
1063
1064 #if IS_ENABLED(CONFIG_NF_NAT)
parse_nat(const struct nlattr * attr,struct ovs_conntrack_info * info,bool log)1065 static int parse_nat(const struct nlattr *attr,
1066 struct ovs_conntrack_info *info, bool log)
1067 {
1068 struct nlattr *a;
1069 int rem;
1070 bool have_ip_max = false;
1071 bool have_proto_max = false;
1072 bool ip_vers = (info->family == NFPROTO_IPV6);
1073
1074 nla_for_each_nested(a, attr, rem) {
1075 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
1076 [OVS_NAT_ATTR_SRC] = {0, 0},
1077 [OVS_NAT_ATTR_DST] = {0, 0},
1078 [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr),
1079 sizeof(struct in6_addr)},
1080 [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr),
1081 sizeof(struct in6_addr)},
1082 [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)},
1083 [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)},
1084 [OVS_NAT_ATTR_PERSISTENT] = {0, 0},
1085 [OVS_NAT_ATTR_PROTO_HASH] = {0, 0},
1086 [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0},
1087 };
1088 int type = nla_type(a);
1089
1090 if (type > OVS_NAT_ATTR_MAX) {
1091 OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)",
1092 type, OVS_NAT_ATTR_MAX);
1093 return -EINVAL;
1094 }
1095
1096 if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
1097 OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)",
1098 type, nla_len(a),
1099 ovs_nat_attr_lens[type][ip_vers]);
1100 return -EINVAL;
1101 }
1102
1103 switch (type) {
1104 case OVS_NAT_ATTR_SRC:
1105 case OVS_NAT_ATTR_DST:
1106 if (info->nat) {
1107 OVS_NLERR(log, "Only one type of NAT may be specified");
1108 return -ERANGE;
1109 }
1110 info->nat |= OVS_CT_NAT;
1111 info->nat |= ((type == OVS_NAT_ATTR_SRC)
1112 ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT);
1113 break;
1114
1115 case OVS_NAT_ATTR_IP_MIN:
1116 nla_memcpy(&info->range.min_addr, a,
1117 sizeof(info->range.min_addr));
1118 info->range.flags |= NF_NAT_RANGE_MAP_IPS;
1119 break;
1120
1121 case OVS_NAT_ATTR_IP_MAX:
1122 have_ip_max = true;
1123 nla_memcpy(&info->range.max_addr, a,
1124 sizeof(info->range.max_addr));
1125 info->range.flags |= NF_NAT_RANGE_MAP_IPS;
1126 break;
1127
1128 case OVS_NAT_ATTR_PROTO_MIN:
1129 info->range.min_proto.all = htons(nla_get_u16(a));
1130 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
1131 break;
1132
1133 case OVS_NAT_ATTR_PROTO_MAX:
1134 have_proto_max = true;
1135 info->range.max_proto.all = htons(nla_get_u16(a));
1136 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
1137 break;
1138
1139 case OVS_NAT_ATTR_PERSISTENT:
1140 info->range.flags |= NF_NAT_RANGE_PERSISTENT;
1141 break;
1142
1143 case OVS_NAT_ATTR_PROTO_HASH:
1144 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM;
1145 break;
1146
1147 case OVS_NAT_ATTR_PROTO_RANDOM:
1148 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY;
1149 break;
1150
1151 default:
1152 OVS_NLERR(log, "Unknown nat attribute (%d)", type);
1153 return -EINVAL;
1154 }
1155 }
1156
1157 if (rem > 0) {
1158 OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem);
1159 return -EINVAL;
1160 }
1161 if (!info->nat) {
1162 /* Do not allow flags if no type is given. */
1163 if (info->range.flags) {
1164 OVS_NLERR(log,
1165 "NAT flags may be given only when NAT range (SRC or DST) is also specified."
1166 );
1167 return -EINVAL;
1168 }
1169 info->nat = OVS_CT_NAT; /* NAT existing connections. */
1170 } else if (!info->commit) {
1171 OVS_NLERR(log,
1172 "NAT attributes may be specified only when CT COMMIT flag is also specified."
1173 );
1174 return -EINVAL;
1175 }
1176 /* Allow missing IP_MAX. */
1177 if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) {
1178 memcpy(&info->range.max_addr, &info->range.min_addr,
1179 sizeof(info->range.max_addr));
1180 }
1181 /* Allow missing PROTO_MAX. */
1182 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1183 !have_proto_max) {
1184 info->range.max_proto.all = info->range.min_proto.all;
1185 }
1186 return 0;
1187 }
1188 #endif
1189
1190 static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
1191 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
1192 [OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 },
1193 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
1194 .maxlen = sizeof(u16) },
1195 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
1196 .maxlen = sizeof(struct md_mark) },
1197 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
1198 .maxlen = sizeof(struct md_labels) },
1199 [OVS_CT_ATTR_HELPER] = { .minlen = 1,
1200 .maxlen = NF_CT_HELPER_NAME_LEN },
1201 #if IS_ENABLED(CONFIG_NF_NAT)
1202 /* NAT length is checked when parsing the nested attributes. */
1203 [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX },
1204 #endif
1205 [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32),
1206 .maxlen = sizeof(u32) },
1207 [OVS_CT_ATTR_TIMEOUT] = { .minlen = 1,
1208 .maxlen = CTNL_TIMEOUT_NAME_MAX },
1209 };
1210
parse_ct(const struct nlattr * attr,struct ovs_conntrack_info * info,const char ** helper,bool log)1211 static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
1212 const char **helper, bool log)
1213 {
1214 struct nlattr *a;
1215 int rem;
1216
1217 nla_for_each_nested(a, attr, rem) {
1218 int type = nla_type(a);
1219 int maxlen;
1220 int minlen;
1221
1222 if (type > OVS_CT_ATTR_MAX) {
1223 OVS_NLERR(log,
1224 "Unknown conntrack attr (type=%d, max=%d)",
1225 type, OVS_CT_ATTR_MAX);
1226 return -EINVAL;
1227 }
1228
1229 maxlen = ovs_ct_attr_lens[type].maxlen;
1230 minlen = ovs_ct_attr_lens[type].minlen;
1231 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
1232 OVS_NLERR(log,
1233 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
1234 type, nla_len(a), maxlen);
1235 return -EINVAL;
1236 }
1237
1238 switch (type) {
1239 case OVS_CT_ATTR_FORCE_COMMIT:
1240 info->force = true;
1241 fallthrough;
1242 case OVS_CT_ATTR_COMMIT:
1243 info->commit = true;
1244 break;
1245 #ifdef CONFIG_NF_CONNTRACK_ZONES
1246 case OVS_CT_ATTR_ZONE:
1247 info->zone.id = nla_get_u16(a);
1248 break;
1249 #endif
1250 #ifdef CONFIG_NF_CONNTRACK_MARK
1251 case OVS_CT_ATTR_MARK: {
1252 struct md_mark *mark = nla_data(a);
1253
1254 if (!mark->mask) {
1255 OVS_NLERR(log, "ct_mark mask cannot be 0");
1256 return -EINVAL;
1257 }
1258 info->mark = *mark;
1259 break;
1260 }
1261 #endif
1262 #ifdef CONFIG_NF_CONNTRACK_LABELS
1263 case OVS_CT_ATTR_LABELS: {
1264 struct md_labels *labels = nla_data(a);
1265
1266 if (!labels_nonzero(&labels->mask)) {
1267 OVS_NLERR(log, "ct_labels mask cannot be 0");
1268 return -EINVAL;
1269 }
1270 info->labels = *labels;
1271 break;
1272 }
1273 #endif
1274 case OVS_CT_ATTR_HELPER:
1275 *helper = nla_data(a);
1276 if (!string_is_terminated(*helper, nla_len(a))) {
1277 OVS_NLERR(log, "Invalid conntrack helper");
1278 return -EINVAL;
1279 }
1280 break;
1281 #if IS_ENABLED(CONFIG_NF_NAT)
1282 case OVS_CT_ATTR_NAT: {
1283 int err = parse_nat(a, info, log);
1284
1285 if (err)
1286 return err;
1287 break;
1288 }
1289 #endif
1290 case OVS_CT_ATTR_EVENTMASK:
1291 info->have_eventmask = true;
1292 info->eventmask = nla_get_u32(a);
1293 break;
1294 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1295 case OVS_CT_ATTR_TIMEOUT:
1296 memcpy(info->timeout, nla_data(a), nla_len(a));
1297 if (!string_is_terminated(info->timeout, nla_len(a))) {
1298 OVS_NLERR(log, "Invalid conntrack timeout");
1299 return -EINVAL;
1300 }
1301 break;
1302 #endif
1303
1304 default:
1305 OVS_NLERR(log, "Unknown conntrack attr (%d)",
1306 type);
1307 return -EINVAL;
1308 }
1309 }
1310
1311 #ifdef CONFIG_NF_CONNTRACK_MARK
1312 if (!info->commit && info->mark.mask) {
1313 OVS_NLERR(log,
1314 "Setting conntrack mark requires 'commit' flag.");
1315 return -EINVAL;
1316 }
1317 #endif
1318 #ifdef CONFIG_NF_CONNTRACK_LABELS
1319 if (!info->commit && labels_nonzero(&info->labels.mask)) {
1320 OVS_NLERR(log,
1321 "Setting conntrack labels requires 'commit' flag.");
1322 return -EINVAL;
1323 }
1324 #endif
1325 if (rem > 0) {
1326 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
1327 return -EINVAL;
1328 }
1329
1330 return 0;
1331 }
1332
ovs_ct_verify(struct net * net,enum ovs_key_attr attr)1333 bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
1334 {
1335 if (attr == OVS_KEY_ATTR_CT_STATE)
1336 return true;
1337 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
1338 attr == OVS_KEY_ATTR_CT_ZONE)
1339 return true;
1340 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
1341 attr == OVS_KEY_ATTR_CT_MARK)
1342 return true;
1343 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
1344 attr == OVS_KEY_ATTR_CT_LABELS) {
1345 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1346
1347 return ovs_net->xt_label;
1348 }
1349
1350 return false;
1351 }
1352
ovs_ct_copy_action(struct net * net,const struct nlattr * attr,const struct sw_flow_key * key,struct sw_flow_actions ** sfa,bool log)1353 int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
1354 const struct sw_flow_key *key,
1355 struct sw_flow_actions **sfa, bool log)
1356 {
1357 struct ovs_conntrack_info ct_info;
1358 const char *helper = NULL;
1359 u16 family;
1360 int err;
1361
1362 family = key_to_nfproto(key);
1363 if (family == NFPROTO_UNSPEC) {
1364 OVS_NLERR(log, "ct family unspecified");
1365 return -EINVAL;
1366 }
1367
1368 memset(&ct_info, 0, sizeof(ct_info));
1369 ct_info.family = family;
1370
1371 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
1372 NF_CT_DEFAULT_ZONE_DIR, 0);
1373
1374 err = parse_ct(attr, &ct_info, &helper, log);
1375 if (err)
1376 return err;
1377
1378 /* Set up template for tracking connections in specific zones. */
1379 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
1380 if (!ct_info.ct) {
1381 OVS_NLERR(log, "Failed to allocate conntrack template");
1382 return -ENOMEM;
1383 }
1384
1385 if (ct_info.timeout[0]) {
1386 if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
1387 ct_info.timeout))
1388 OVS_NLERR(log,
1389 "Failed to associated timeout policy '%s'",
1390 ct_info.timeout);
1391 else
1392 ct_info.nf_ct_timeout = rcu_dereference(
1393 nf_ct_timeout_find(ct_info.ct)->timeout);
1394
1395 }
1396
1397 if (helper) {
1398 err = nf_ct_add_helper(ct_info.ct, helper, ct_info.family,
1399 key->ip.proto, ct_info.nat, &ct_info.helper);
1400 if (err) {
1401 OVS_NLERR(log, "Failed to add %s helper %d", helper, err);
1402 goto err_free_ct;
1403 }
1404 }
1405
1406 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
1407 sizeof(ct_info), log);
1408 if (err)
1409 goto err_free_ct;
1410
1411 if (ct_info.commit)
1412 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
1413 return 0;
1414 err_free_ct:
1415 __ovs_ct_free_action(&ct_info);
1416 return err;
1417 }
1418
1419 #if IS_ENABLED(CONFIG_NF_NAT)
ovs_ct_nat_to_attr(const struct ovs_conntrack_info * info,struct sk_buff * skb)1420 static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
1421 struct sk_buff *skb)
1422 {
1423 struct nlattr *start;
1424
1425 start = nla_nest_start_noflag(skb, OVS_CT_ATTR_NAT);
1426 if (!start)
1427 return false;
1428
1429 if (info->nat & OVS_CT_SRC_NAT) {
1430 if (nla_put_flag(skb, OVS_NAT_ATTR_SRC))
1431 return false;
1432 } else if (info->nat & OVS_CT_DST_NAT) {
1433 if (nla_put_flag(skb, OVS_NAT_ATTR_DST))
1434 return false;
1435 } else {
1436 goto out;
1437 }
1438
1439 if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
1440 if (IS_ENABLED(CONFIG_NF_NAT) &&
1441 info->family == NFPROTO_IPV4) {
1442 if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
1443 info->range.min_addr.ip) ||
1444 (info->range.max_addr.ip
1445 != info->range.min_addr.ip &&
1446 (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
1447 info->range.max_addr.ip))))
1448 return false;
1449 } else if (IS_ENABLED(CONFIG_IPV6) &&
1450 info->family == NFPROTO_IPV6) {
1451 if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
1452 &info->range.min_addr.in6) ||
1453 (memcmp(&info->range.max_addr.in6,
1454 &info->range.min_addr.in6,
1455 sizeof(info->range.max_addr.in6)) &&
1456 (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX,
1457 &info->range.max_addr.in6))))
1458 return false;
1459 } else {
1460 return false;
1461 }
1462 }
1463 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1464 (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN,
1465 ntohs(info->range.min_proto.all)) ||
1466 (info->range.max_proto.all != info->range.min_proto.all &&
1467 nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX,
1468 ntohs(info->range.max_proto.all)))))
1469 return false;
1470
1471 if (info->range.flags & NF_NAT_RANGE_PERSISTENT &&
1472 nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT))
1473 return false;
1474 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM &&
1475 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH))
1476 return false;
1477 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY &&
1478 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM))
1479 return false;
1480 out:
1481 nla_nest_end(skb, start);
1482
1483 return true;
1484 }
1485 #endif
1486
ovs_ct_action_to_attr(const struct ovs_conntrack_info * ct_info,struct sk_buff * skb)1487 int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
1488 struct sk_buff *skb)
1489 {
1490 struct nlattr *start;
1491
1492 start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CT);
1493 if (!start)
1494 return -EMSGSIZE;
1495
1496 if (ct_info->commit && nla_put_flag(skb, ct_info->force
1497 ? OVS_CT_ATTR_FORCE_COMMIT
1498 : OVS_CT_ATTR_COMMIT))
1499 return -EMSGSIZE;
1500 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
1501 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
1502 return -EMSGSIZE;
1503 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
1504 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
1505 &ct_info->mark))
1506 return -EMSGSIZE;
1507 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
1508 labels_nonzero(&ct_info->labels.mask) &&
1509 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
1510 &ct_info->labels))
1511 return -EMSGSIZE;
1512 if (ct_info->helper) {
1513 if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
1514 ct_info->helper->name))
1515 return -EMSGSIZE;
1516 }
1517 if (ct_info->have_eventmask &&
1518 nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
1519 return -EMSGSIZE;
1520 if (ct_info->timeout[0]) {
1521 if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout))
1522 return -EMSGSIZE;
1523 }
1524
1525 #if IS_ENABLED(CONFIG_NF_NAT)
1526 if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
1527 return -EMSGSIZE;
1528 #endif
1529 nla_nest_end(skb, start);
1530
1531 return 0;
1532 }
1533
ovs_ct_free_action(const struct nlattr * a)1534 void ovs_ct_free_action(const struct nlattr *a)
1535 {
1536 struct ovs_conntrack_info *ct_info = nla_data(a);
1537
1538 __ovs_ct_free_action(ct_info);
1539 }
1540
__ovs_ct_free_action(struct ovs_conntrack_info * ct_info)1541 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
1542 {
1543 if (ct_info->helper) {
1544 #if IS_ENABLED(CONFIG_NF_NAT)
1545 if (ct_info->nat)
1546 nf_nat_helper_put(ct_info->helper);
1547 #endif
1548 nf_conntrack_helper_put(ct_info->helper);
1549 }
1550 if (ct_info->ct) {
1551 if (ct_info->timeout[0])
1552 nf_ct_destroy_timeout(ct_info->ct);
1553 nf_ct_tmpl_free(ct_info->ct);
1554 }
1555 }
1556
1557 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
ovs_ct_limit_init(struct net * net,struct ovs_net * ovs_net)1558 static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
1559 {
1560 int i, err;
1561
1562 ovs_net->ct_limit_info = kmalloc(sizeof(*ovs_net->ct_limit_info),
1563 GFP_KERNEL);
1564 if (!ovs_net->ct_limit_info)
1565 return -ENOMEM;
1566
1567 ovs_net->ct_limit_info->default_limit = OVS_CT_LIMIT_DEFAULT;
1568 ovs_net->ct_limit_info->limits =
1569 kmalloc_array(CT_LIMIT_HASH_BUCKETS, sizeof(struct hlist_head),
1570 GFP_KERNEL);
1571 if (!ovs_net->ct_limit_info->limits) {
1572 kfree(ovs_net->ct_limit_info);
1573 return -ENOMEM;
1574 }
1575
1576 for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
1577 INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
1578
1579 ovs_net->ct_limit_info->data =
1580 nf_conncount_init(net, NFPROTO_INET, sizeof(u32));
1581
1582 if (IS_ERR(ovs_net->ct_limit_info->data)) {
1583 err = PTR_ERR(ovs_net->ct_limit_info->data);
1584 kfree(ovs_net->ct_limit_info->limits);
1585 kfree(ovs_net->ct_limit_info);
1586 pr_err("openvswitch: failed to init nf_conncount %d\n", err);
1587 return err;
1588 }
1589 return 0;
1590 }
1591
ovs_ct_limit_exit(struct net * net,struct ovs_net * ovs_net)1592 static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
1593 {
1594 const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
1595 int i;
1596
1597 nf_conncount_destroy(net, NFPROTO_INET, info->data);
1598 for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
1599 struct hlist_head *head = &info->limits[i];
1600 struct ovs_ct_limit *ct_limit;
1601 struct hlist_node *next;
1602
1603 hlist_for_each_entry_safe(ct_limit, next, head, hlist_node)
1604 kfree_rcu(ct_limit, rcu);
1605 }
1606 kfree(info->limits);
1607 kfree(info);
1608 }
1609
1610 static struct sk_buff *
ovs_ct_limit_cmd_reply_start(struct genl_info * info,u8 cmd,struct ovs_header ** ovs_reply_header)1611 ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd,
1612 struct ovs_header **ovs_reply_header)
1613 {
1614 struct ovs_header *ovs_header = genl_info_userhdr(info);
1615 struct sk_buff *skb;
1616
1617 skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1618 if (!skb)
1619 return ERR_PTR(-ENOMEM);
1620
1621 *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
1622 info->snd_seq,
1623 &dp_ct_limit_genl_family, 0, cmd);
1624
1625 if (!*ovs_reply_header) {
1626 nlmsg_free(skb);
1627 return ERR_PTR(-EMSGSIZE);
1628 }
1629 (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
1630
1631 return skb;
1632 }
1633
check_zone_id(int zone_id,u16 * pzone)1634 static bool check_zone_id(int zone_id, u16 *pzone)
1635 {
1636 if (zone_id >= 0 && zone_id <= 65535) {
1637 *pzone = (u16)zone_id;
1638 return true;
1639 }
1640 return false;
1641 }
1642
ovs_ct_limit_set_zone_limit(struct nlattr * nla_zone_limit,struct ovs_ct_limit_info * info)1643 static int ovs_ct_limit_set_zone_limit(struct nlattr *nla_zone_limit,
1644 struct ovs_ct_limit_info *info)
1645 {
1646 struct ovs_zone_limit *zone_limit;
1647 int rem;
1648 u16 zone;
1649
1650 rem = NLA_ALIGN(nla_len(nla_zone_limit));
1651 zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
1652
1653 while (rem >= sizeof(*zone_limit)) {
1654 if (unlikely(zone_limit->zone_id ==
1655 OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
1656 ovs_lock();
1657 info->default_limit = zone_limit->limit;
1658 ovs_unlock();
1659 } else if (unlikely(!check_zone_id(
1660 zone_limit->zone_id, &zone))) {
1661 OVS_NLERR(true, "zone id is out of range");
1662 } else {
1663 struct ovs_ct_limit *ct_limit;
1664
1665 ct_limit = kmalloc(sizeof(*ct_limit),
1666 GFP_KERNEL_ACCOUNT);
1667 if (!ct_limit)
1668 return -ENOMEM;
1669
1670 ct_limit->zone = zone;
1671 ct_limit->limit = zone_limit->limit;
1672
1673 ovs_lock();
1674 ct_limit_set(info, ct_limit);
1675 ovs_unlock();
1676 }
1677 rem -= NLA_ALIGN(sizeof(*zone_limit));
1678 zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
1679 NLA_ALIGN(sizeof(*zone_limit)));
1680 }
1681
1682 if (rem)
1683 OVS_NLERR(true, "set zone limit has %d unknown bytes", rem);
1684
1685 return 0;
1686 }
1687
ovs_ct_limit_del_zone_limit(struct nlattr * nla_zone_limit,struct ovs_ct_limit_info * info)1688 static int ovs_ct_limit_del_zone_limit(struct nlattr *nla_zone_limit,
1689 struct ovs_ct_limit_info *info)
1690 {
1691 struct ovs_zone_limit *zone_limit;
1692 int rem;
1693 u16 zone;
1694
1695 rem = NLA_ALIGN(nla_len(nla_zone_limit));
1696 zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
1697
1698 while (rem >= sizeof(*zone_limit)) {
1699 if (unlikely(zone_limit->zone_id ==
1700 OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
1701 ovs_lock();
1702 info->default_limit = OVS_CT_LIMIT_DEFAULT;
1703 ovs_unlock();
1704 } else if (unlikely(!check_zone_id(
1705 zone_limit->zone_id, &zone))) {
1706 OVS_NLERR(true, "zone id is out of range");
1707 } else {
1708 ovs_lock();
1709 ct_limit_del(info, zone);
1710 ovs_unlock();
1711 }
1712 rem -= NLA_ALIGN(sizeof(*zone_limit));
1713 zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
1714 NLA_ALIGN(sizeof(*zone_limit)));
1715 }
1716
1717 if (rem)
1718 OVS_NLERR(true, "del zone limit has %d unknown bytes", rem);
1719
1720 return 0;
1721 }
1722
ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info * info,struct sk_buff * reply)1723 static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info,
1724 struct sk_buff *reply)
1725 {
1726 struct ovs_zone_limit zone_limit = {
1727 .zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE,
1728 .limit = info->default_limit,
1729 };
1730
1731 return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
1732 }
1733
__ovs_ct_limit_get_zone_limit(struct net * net,struct nf_conncount_data * data,u16 zone_id,u32 limit,struct sk_buff * reply)1734 static int __ovs_ct_limit_get_zone_limit(struct net *net,
1735 struct nf_conncount_data *data,
1736 u16 zone_id, u32 limit,
1737 struct sk_buff *reply)
1738 {
1739 struct nf_conntrack_zone ct_zone;
1740 struct ovs_zone_limit zone_limit;
1741 u32 conncount_key = zone_id;
1742
1743 zone_limit.zone_id = zone_id;
1744 zone_limit.limit = limit;
1745 nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
1746
1747 zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL,
1748 &ct_zone);
1749 return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
1750 }
1751
ovs_ct_limit_get_zone_limit(struct net * net,struct nlattr * nla_zone_limit,struct ovs_ct_limit_info * info,struct sk_buff * reply)1752 static int ovs_ct_limit_get_zone_limit(struct net *net,
1753 struct nlattr *nla_zone_limit,
1754 struct ovs_ct_limit_info *info,
1755 struct sk_buff *reply)
1756 {
1757 struct ovs_zone_limit *zone_limit;
1758 int rem, err;
1759 u32 limit;
1760 u16 zone;
1761
1762 rem = NLA_ALIGN(nla_len(nla_zone_limit));
1763 zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
1764
1765 while (rem >= sizeof(*zone_limit)) {
1766 if (unlikely(zone_limit->zone_id ==
1767 OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
1768 err = ovs_ct_limit_get_default_limit(info, reply);
1769 if (err)
1770 return err;
1771 } else if (unlikely(!check_zone_id(zone_limit->zone_id,
1772 &zone))) {
1773 OVS_NLERR(true, "zone id is out of range");
1774 } else {
1775 rcu_read_lock();
1776 limit = ct_limit_get(info, zone);
1777 rcu_read_unlock();
1778
1779 err = __ovs_ct_limit_get_zone_limit(
1780 net, info->data, zone, limit, reply);
1781 if (err)
1782 return err;
1783 }
1784 rem -= NLA_ALIGN(sizeof(*zone_limit));
1785 zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
1786 NLA_ALIGN(sizeof(*zone_limit)));
1787 }
1788
1789 if (rem)
1790 OVS_NLERR(true, "get zone limit has %d unknown bytes", rem);
1791
1792 return 0;
1793 }
1794
ovs_ct_limit_get_all_zone_limit(struct net * net,struct ovs_ct_limit_info * info,struct sk_buff * reply)1795 static int ovs_ct_limit_get_all_zone_limit(struct net *net,
1796 struct ovs_ct_limit_info *info,
1797 struct sk_buff *reply)
1798 {
1799 struct ovs_ct_limit *ct_limit;
1800 struct hlist_head *head;
1801 int i, err = 0;
1802
1803 err = ovs_ct_limit_get_default_limit(info, reply);
1804 if (err)
1805 return err;
1806
1807 rcu_read_lock();
1808 for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
1809 head = &info->limits[i];
1810 hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
1811 err = __ovs_ct_limit_get_zone_limit(net, info->data,
1812 ct_limit->zone, ct_limit->limit, reply);
1813 if (err)
1814 goto exit_err;
1815 }
1816 }
1817
1818 exit_err:
1819 rcu_read_unlock();
1820 return err;
1821 }
1822
ovs_ct_limit_cmd_set(struct sk_buff * skb,struct genl_info * info)1823 static int ovs_ct_limit_cmd_set(struct sk_buff *skb, struct genl_info *info)
1824 {
1825 struct nlattr **a = info->attrs;
1826 struct sk_buff *reply;
1827 struct ovs_header *ovs_reply_header;
1828 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1829 struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
1830 int err;
1831
1832 reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_SET,
1833 &ovs_reply_header);
1834 if (IS_ERR(reply))
1835 return PTR_ERR(reply);
1836
1837 if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
1838 err = -EINVAL;
1839 goto exit_err;
1840 }
1841
1842 err = ovs_ct_limit_set_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT],
1843 ct_limit_info);
1844 if (err)
1845 goto exit_err;
1846
1847 static_branch_enable(&ovs_ct_limit_enabled);
1848
1849 genlmsg_end(reply, ovs_reply_header);
1850 return genlmsg_reply(reply, info);
1851
1852 exit_err:
1853 nlmsg_free(reply);
1854 return err;
1855 }
1856
ovs_ct_limit_cmd_del(struct sk_buff * skb,struct genl_info * info)1857 static int ovs_ct_limit_cmd_del(struct sk_buff *skb, struct genl_info *info)
1858 {
1859 struct nlattr **a = info->attrs;
1860 struct sk_buff *reply;
1861 struct ovs_header *ovs_reply_header;
1862 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1863 struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
1864 int err;
1865
1866 reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_DEL,
1867 &ovs_reply_header);
1868 if (IS_ERR(reply))
1869 return PTR_ERR(reply);
1870
1871 if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
1872 err = -EINVAL;
1873 goto exit_err;
1874 }
1875
1876 err = ovs_ct_limit_del_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT],
1877 ct_limit_info);
1878 if (err)
1879 goto exit_err;
1880
1881 genlmsg_end(reply, ovs_reply_header);
1882 return genlmsg_reply(reply, info);
1883
1884 exit_err:
1885 nlmsg_free(reply);
1886 return err;
1887 }
1888
ovs_ct_limit_cmd_get(struct sk_buff * skb,struct genl_info * info)1889 static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info)
1890 {
1891 struct nlattr **a = info->attrs;
1892 struct nlattr *nla_reply;
1893 struct sk_buff *reply;
1894 struct ovs_header *ovs_reply_header;
1895 struct net *net = sock_net(skb->sk);
1896 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1897 struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
1898 int err;
1899
1900 reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_GET,
1901 &ovs_reply_header);
1902 if (IS_ERR(reply))
1903 return PTR_ERR(reply);
1904
1905 nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
1906 if (!nla_reply) {
1907 err = -EMSGSIZE;
1908 goto exit_err;
1909 }
1910
1911 if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
1912 err = ovs_ct_limit_get_zone_limit(
1913 net, a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], ct_limit_info,
1914 reply);
1915 if (err)
1916 goto exit_err;
1917 } else {
1918 err = ovs_ct_limit_get_all_zone_limit(net, ct_limit_info,
1919 reply);
1920 if (err)
1921 goto exit_err;
1922 }
1923
1924 nla_nest_end(reply, nla_reply);
1925 genlmsg_end(reply, ovs_reply_header);
1926 return genlmsg_reply(reply, info);
1927
1928 exit_err:
1929 nlmsg_free(reply);
1930 return err;
1931 }
1932
1933 static const struct genl_small_ops ct_limit_genl_ops[] = {
1934 { .cmd = OVS_CT_LIMIT_CMD_SET,
1935 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1936 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
1937 * privilege.
1938 */
1939 .doit = ovs_ct_limit_cmd_set,
1940 },
1941 { .cmd = OVS_CT_LIMIT_CMD_DEL,
1942 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1943 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
1944 * privilege.
1945 */
1946 .doit = ovs_ct_limit_cmd_del,
1947 },
1948 { .cmd = OVS_CT_LIMIT_CMD_GET,
1949 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1950 .flags = 0, /* OK for unprivileged users. */
1951 .doit = ovs_ct_limit_cmd_get,
1952 },
1953 };
1954
1955 static const struct genl_multicast_group ovs_ct_limit_multicast_group = {
1956 .name = OVS_CT_LIMIT_MCGROUP,
1957 };
1958
1959 struct genl_family dp_ct_limit_genl_family __ro_after_init = {
1960 .hdrsize = sizeof(struct ovs_header),
1961 .name = OVS_CT_LIMIT_FAMILY,
1962 .version = OVS_CT_LIMIT_VERSION,
1963 .maxattr = OVS_CT_LIMIT_ATTR_MAX,
1964 .policy = ct_limit_policy,
1965 .netnsok = true,
1966 .parallel_ops = true,
1967 .small_ops = ct_limit_genl_ops,
1968 .n_small_ops = ARRAY_SIZE(ct_limit_genl_ops),
1969 .resv_start_op = OVS_CT_LIMIT_CMD_GET + 1,
1970 .mcgrps = &ovs_ct_limit_multicast_group,
1971 .n_mcgrps = 1,
1972 .module = THIS_MODULE,
1973 };
1974 #endif
1975
ovs_ct_init(struct net * net)1976 int ovs_ct_init(struct net *net)
1977 {
1978 unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
1979 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1980
1981 if (nf_connlabels_get(net, n_bits - 1)) {
1982 ovs_net->xt_label = false;
1983 OVS_NLERR(true, "Failed to set connlabel length");
1984 } else {
1985 ovs_net->xt_label = true;
1986 }
1987
1988 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
1989 return ovs_ct_limit_init(net, ovs_net);
1990 #else
1991 return 0;
1992 #endif
1993 }
1994
ovs_ct_exit(struct net * net)1995 void ovs_ct_exit(struct net *net)
1996 {
1997 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1998
1999 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2000 ovs_ct_limit_exit(net, ovs_net);
2001 #endif
2002
2003 if (ovs_net->xt_label)
2004 nf_connlabels_put(net);
2005 }
2006