1b4c2b959SKumar Kartikeya Dwivedi // SPDX-License-Identifier: GPL-2.0-only
2b4c2b959SKumar Kartikeya Dwivedi /* Unstable Conntrack Helpers for XDP and TC-BPF hook
3b4c2b959SKumar Kartikeya Dwivedi  *
4b4c2b959SKumar Kartikeya Dwivedi  * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
5b4c2b959SKumar Kartikeya Dwivedi  * allowed to break compatibility for these functions since the interface they
6b4c2b959SKumar Kartikeya Dwivedi  * are exposed through to BPF programs is explicitly unstable.
7b4c2b959SKumar Kartikeya Dwivedi  */
8b4c2b959SKumar Kartikeya Dwivedi 
9864b656fSDaniel Xu #include <linux/bpf_verifier.h>
10b4c2b959SKumar Kartikeya Dwivedi #include <linux/bpf.h>
11b4c2b959SKumar Kartikeya Dwivedi #include <linux/btf.h>
12fdf21497SDaniel Xu #include <linux/filter.h>
13864b656fSDaniel Xu #include <linux/mutex.h>
14b4c2b959SKumar Kartikeya Dwivedi #include <linux/types.h>
15b4c2b959SKumar Kartikeya Dwivedi #include <linux/btf_ids.h>
16b4c2b959SKumar Kartikeya Dwivedi #include <linux/net_namespace.h>
17680ee045SJakub Kicinski #include <net/xdp.h>
180b206c6dSKumar Kartikeya Dwivedi #include <net/netfilter/nf_conntrack_bpf.h>
19b4c2b959SKumar Kartikeya Dwivedi #include <net/netfilter/nf_conntrack_core.h>
20b4c2b959SKumar Kartikeya Dwivedi 
21b4c2b959SKumar Kartikeya Dwivedi /* bpf_ct_opts - Options for CT lookup helpers
22b4c2b959SKumar Kartikeya Dwivedi  *
23b4c2b959SKumar Kartikeya Dwivedi  * Members:
24b4c2b959SKumar Kartikeya Dwivedi  * @netns_id   - Specify the network namespace for lookup
25b4c2b959SKumar Kartikeya Dwivedi  *		 Values:
26b4c2b959SKumar Kartikeya Dwivedi  *		   BPF_F_CURRENT_NETNS (-1)
27b4c2b959SKumar Kartikeya Dwivedi  *		     Use namespace associated with ctx (xdp_md, __sk_buff)
28b4c2b959SKumar Kartikeya Dwivedi  *		   [0, S32_MAX]
29b4c2b959SKumar Kartikeya Dwivedi  *		     Network Namespace ID
30b4c2b959SKumar Kartikeya Dwivedi  * @error      - Out parameter, set for any errors encountered
31b4c2b959SKumar Kartikeya Dwivedi  *		 Values:
32b4c2b959SKumar Kartikeya Dwivedi  *		   -EINVAL - Passed NULL for bpf_tuple pointer
33b4c2b959SKumar Kartikeya Dwivedi  *		   -EINVAL - opts->reserved is not 0
34b4c2b959SKumar Kartikeya Dwivedi  *		   -EINVAL - netns_id is less than -1
35b4c2b959SKumar Kartikeya Dwivedi  *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
36b4c2b959SKumar Kartikeya Dwivedi  *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
37b4c2b959SKumar Kartikeya Dwivedi  *		   -ENONET - No network namespace found for netns_id
38b4c2b959SKumar Kartikeya Dwivedi  *		   -ENOENT - Conntrack lookup could not find entry for tuple
39b4c2b959SKumar Kartikeya Dwivedi  *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
40b4c2b959SKumar Kartikeya Dwivedi  *				   or sizeof(tuple->ipv6)
41b4c2b959SKumar Kartikeya Dwivedi  * @l4proto    - Layer 4 protocol
42b4c2b959SKumar Kartikeya Dwivedi  *		 Values:
43b4c2b959SKumar Kartikeya Dwivedi  *		   IPPROTO_TCP, IPPROTO_UDP
441963c740SLorenzo Bianconi  * @dir:       - connection tracking tuple direction.
45b4c2b959SKumar Kartikeya Dwivedi  * @reserved   - Reserved member, will be reused for more options in future
46b4c2b959SKumar Kartikeya Dwivedi  *		 Values:
47b4c2b959SKumar Kartikeya Dwivedi  *		   0
48b4c2b959SKumar Kartikeya Dwivedi  */
49b4c2b959SKumar Kartikeya Dwivedi struct bpf_ct_opts {
50b4c2b959SKumar Kartikeya Dwivedi 	s32 netns_id;
51b4c2b959SKumar Kartikeya Dwivedi 	s32 error;
52b4c2b959SKumar Kartikeya Dwivedi 	u8 l4proto;
531963c740SLorenzo Bianconi 	u8 dir;
541963c740SLorenzo Bianconi 	u8 reserved[2];
55b4c2b959SKumar Kartikeya Dwivedi };
56b4c2b959SKumar Kartikeya Dwivedi 
57b4c2b959SKumar Kartikeya Dwivedi enum {
58b4c2b959SKumar Kartikeya Dwivedi 	NF_BPF_CT_OPTS_SZ = 12,
59b4c2b959SKumar Kartikeya Dwivedi };
60b4c2b959SKumar Kartikeya Dwivedi 
bpf_nf_ct_tuple_parse(struct bpf_sock_tuple * bpf_tuple,u32 tuple_len,u8 protonum,u8 dir,struct nf_conntrack_tuple * tuple)61d7e79c97SLorenzo Bianconi static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
62d7e79c97SLorenzo Bianconi 				 u32 tuple_len, u8 protonum, u8 dir,
63d7e79c97SLorenzo Bianconi 				 struct nf_conntrack_tuple *tuple)
64d7e79c97SLorenzo Bianconi {
65d7e79c97SLorenzo Bianconi 	union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
66d7e79c97SLorenzo Bianconi 	union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
67d7e79c97SLorenzo Bianconi 	union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
68d7e79c97SLorenzo Bianconi 						  : &tuple->src.u;
69d7e79c97SLorenzo Bianconi 	union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
70d7e79c97SLorenzo Bianconi 						  : (void *)&tuple->dst.u;
71d7e79c97SLorenzo Bianconi 
72d7e79c97SLorenzo Bianconi 	if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
73d7e79c97SLorenzo Bianconi 		return -EPROTO;
74d7e79c97SLorenzo Bianconi 
75d7e79c97SLorenzo Bianconi 	memset(tuple, 0, sizeof(*tuple));
76d7e79c97SLorenzo Bianconi 
77d7e79c97SLorenzo Bianconi 	switch (tuple_len) {
78d7e79c97SLorenzo Bianconi 	case sizeof(bpf_tuple->ipv4):
79d7e79c97SLorenzo Bianconi 		tuple->src.l3num = AF_INET;
80d7e79c97SLorenzo Bianconi 		src->ip = bpf_tuple->ipv4.saddr;
81d7e79c97SLorenzo Bianconi 		sport->tcp.port = bpf_tuple->ipv4.sport;
82d7e79c97SLorenzo Bianconi 		dst->ip = bpf_tuple->ipv4.daddr;
83d7e79c97SLorenzo Bianconi 		dport->tcp.port = bpf_tuple->ipv4.dport;
84d7e79c97SLorenzo Bianconi 		break;
85d7e79c97SLorenzo Bianconi 	case sizeof(bpf_tuple->ipv6):
86d7e79c97SLorenzo Bianconi 		tuple->src.l3num = AF_INET6;
87d7e79c97SLorenzo Bianconi 		memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
88d7e79c97SLorenzo Bianconi 		sport->tcp.port = bpf_tuple->ipv6.sport;
89d7e79c97SLorenzo Bianconi 		memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
90d7e79c97SLorenzo Bianconi 		dport->tcp.port = bpf_tuple->ipv6.dport;
91d7e79c97SLorenzo Bianconi 		break;
92d7e79c97SLorenzo Bianconi 	default:
93d7e79c97SLorenzo Bianconi 		return -EAFNOSUPPORT;
94d7e79c97SLorenzo Bianconi 	}
95d7e79c97SLorenzo Bianconi 	tuple->dst.protonum = protonum;
96d7e79c97SLorenzo Bianconi 	tuple->dst.dir = dir;
97d7e79c97SLorenzo Bianconi 
98d7e79c97SLorenzo Bianconi 	return 0;
99d7e79c97SLorenzo Bianconi }
100d7e79c97SLorenzo Bianconi 
101d7e79c97SLorenzo Bianconi static struct nf_conn *
__bpf_nf_ct_alloc_entry(struct net * net,struct bpf_sock_tuple * bpf_tuple,u32 tuple_len,struct bpf_ct_opts * opts,u32 opts_len,u32 timeout)102d7e79c97SLorenzo Bianconi __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
103d7e79c97SLorenzo Bianconi 			u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
104d7e79c97SLorenzo Bianconi 			u32 timeout)
105d7e79c97SLorenzo Bianconi {
106d7e79c97SLorenzo Bianconi 	struct nf_conntrack_tuple otuple, rtuple;
107d7e79c97SLorenzo Bianconi 	struct nf_conn *ct;
108d7e79c97SLorenzo Bianconi 	int err;
109d7e79c97SLorenzo Bianconi 
110d7e79c97SLorenzo Bianconi 	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
111d7e79c97SLorenzo Bianconi 	    opts_len != NF_BPF_CT_OPTS_SZ)
112d7e79c97SLorenzo Bianconi 		return ERR_PTR(-EINVAL);
113d7e79c97SLorenzo Bianconi 
114d7e79c97SLorenzo Bianconi 	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
115d7e79c97SLorenzo Bianconi 		return ERR_PTR(-EINVAL);
116d7e79c97SLorenzo Bianconi 
117d7e79c97SLorenzo Bianconi 	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
118d7e79c97SLorenzo Bianconi 				    IP_CT_DIR_ORIGINAL, &otuple);
119d7e79c97SLorenzo Bianconi 	if (err < 0)
120d7e79c97SLorenzo Bianconi 		return ERR_PTR(err);
121d7e79c97SLorenzo Bianconi 
122d7e79c97SLorenzo Bianconi 	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
123d7e79c97SLorenzo Bianconi 				    IP_CT_DIR_REPLY, &rtuple);
124d7e79c97SLorenzo Bianconi 	if (err < 0)
125d7e79c97SLorenzo Bianconi 		return ERR_PTR(err);
126d7e79c97SLorenzo Bianconi 
127d7e79c97SLorenzo Bianconi 	if (opts->netns_id >= 0) {
128d7e79c97SLorenzo Bianconi 		net = get_net_ns_by_id(net, opts->netns_id);
129d7e79c97SLorenzo Bianconi 		if (unlikely(!net))
130d7e79c97SLorenzo Bianconi 			return ERR_PTR(-ENONET);
131d7e79c97SLorenzo Bianconi 	}
132d7e79c97SLorenzo Bianconi 
133d7e79c97SLorenzo Bianconi 	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
134d7e79c97SLorenzo Bianconi 				GFP_ATOMIC);
135d7e79c97SLorenzo Bianconi 	if (IS_ERR(ct))
136d7e79c97SLorenzo Bianconi 		goto out;
137d7e79c97SLorenzo Bianconi 
138d7e79c97SLorenzo Bianconi 	memset(&ct->proto, 0, sizeof(ct->proto));
139d7e79c97SLorenzo Bianconi 	__nf_ct_set_timeout(ct, timeout * HZ);
140d7e79c97SLorenzo Bianconi 
141d7e79c97SLorenzo Bianconi out:
142d7e79c97SLorenzo Bianconi 	if (opts->netns_id >= 0)
143d7e79c97SLorenzo Bianconi 		put_net(net);
144d7e79c97SLorenzo Bianconi 
145d7e79c97SLorenzo Bianconi 	return ct;
146d7e79c97SLorenzo Bianconi }
147d7e79c97SLorenzo Bianconi 
__bpf_nf_ct_lookup(struct net * net,struct bpf_sock_tuple * bpf_tuple,u32 tuple_len,struct bpf_ct_opts * opts,u32 opts_len)148b4c2b959SKumar Kartikeya Dwivedi static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
149b4c2b959SKumar Kartikeya Dwivedi 					  struct bpf_sock_tuple *bpf_tuple,
150aed8ee7fSKumar Kartikeya Dwivedi 					  u32 tuple_len, struct bpf_ct_opts *opts,
151aed8ee7fSKumar Kartikeya Dwivedi 					  u32 opts_len)
152b4c2b959SKumar Kartikeya Dwivedi {
153b4c2b959SKumar Kartikeya Dwivedi 	struct nf_conntrack_tuple_hash *hash;
154b4c2b959SKumar Kartikeya Dwivedi 	struct nf_conntrack_tuple tuple;
1551963c740SLorenzo Bianconi 	struct nf_conn *ct;
156d7e79c97SLorenzo Bianconi 	int err;
157b4c2b959SKumar Kartikeya Dwivedi 
158aed8ee7fSKumar Kartikeya Dwivedi 	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
159aed8ee7fSKumar Kartikeya Dwivedi 	    opts_len != NF_BPF_CT_OPTS_SZ)
160aed8ee7fSKumar Kartikeya Dwivedi 		return ERR_PTR(-EINVAL);
161aed8ee7fSKumar Kartikeya Dwivedi 	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
162b4c2b959SKumar Kartikeya Dwivedi 		return ERR_PTR(-EPROTO);
163aed8ee7fSKumar Kartikeya Dwivedi 	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
164b4c2b959SKumar Kartikeya Dwivedi 		return ERR_PTR(-EINVAL);
165b4c2b959SKumar Kartikeya Dwivedi 
166d7e79c97SLorenzo Bianconi 	err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
167d7e79c97SLorenzo Bianconi 				    IP_CT_DIR_ORIGINAL, &tuple);
168d7e79c97SLorenzo Bianconi 	if (err < 0)
169d7e79c97SLorenzo Bianconi 		return ERR_PTR(err);
170b4c2b959SKumar Kartikeya Dwivedi 
171aed8ee7fSKumar Kartikeya Dwivedi 	if (opts->netns_id >= 0) {
172aed8ee7fSKumar Kartikeya Dwivedi 		net = get_net_ns_by_id(net, opts->netns_id);
173b4c2b959SKumar Kartikeya Dwivedi 		if (unlikely(!net))
174b4c2b959SKumar Kartikeya Dwivedi 			return ERR_PTR(-ENONET);
175b4c2b959SKumar Kartikeya Dwivedi 	}
176b4c2b959SKumar Kartikeya Dwivedi 
177b4c2b959SKumar Kartikeya Dwivedi 	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
178aed8ee7fSKumar Kartikeya Dwivedi 	if (opts->netns_id >= 0)
179b4c2b959SKumar Kartikeya Dwivedi 		put_net(net);
180b4c2b959SKumar Kartikeya Dwivedi 	if (!hash)
181b4c2b959SKumar Kartikeya Dwivedi 		return ERR_PTR(-ENOENT);
1821963c740SLorenzo Bianconi 
1831963c740SLorenzo Bianconi 	ct = nf_ct_tuplehash_to_ctrack(hash);
184aed8ee7fSKumar Kartikeya Dwivedi 	opts->dir = NF_CT_DIRECTION(hash);
1851963c740SLorenzo Bianconi 
1861963c740SLorenzo Bianconi 	return ct;
187b4c2b959SKumar Kartikeya Dwivedi }
188b4c2b959SKumar Kartikeya Dwivedi 
189864b656fSDaniel Xu BTF_ID_LIST(btf_nf_conn_ids)
BTF_ID(struct,nf_conn)190864b656fSDaniel Xu BTF_ID(struct, nf_conn)
191864b656fSDaniel Xu BTF_ID(struct, nf_conn___init)
192864b656fSDaniel Xu 
193864b656fSDaniel Xu /* Check writes into `struct nf_conn` */
194864b656fSDaniel Xu static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
1956728aea7SKumar Kartikeya Dwivedi 					   const struct bpf_reg_state *reg,
196b7e852a9SAlexei Starovoitov 					   int off, int size)
197864b656fSDaniel Xu {
1986728aea7SKumar Kartikeya Dwivedi 	const struct btf_type *ncit, *nct, *t;
199864b656fSDaniel Xu 	size_t end;
200864b656fSDaniel Xu 
2016728aea7SKumar Kartikeya Dwivedi 	ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
2026728aea7SKumar Kartikeya Dwivedi 	nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
2036728aea7SKumar Kartikeya Dwivedi 	t = btf_type_by_id(reg->btf, reg->btf_id);
204864b656fSDaniel Xu 	if (t != nct && t != ncit) {
205864b656fSDaniel Xu 		bpf_log(log, "only read is supported\n");
206864b656fSDaniel Xu 		return -EACCES;
207864b656fSDaniel Xu 	}
208864b656fSDaniel Xu 
209864b656fSDaniel Xu 	/* `struct nf_conn` and `struct nf_conn___init` have the same layout
210864b656fSDaniel Xu 	 * so we are safe to simply merge offset checks here
211864b656fSDaniel Xu 	 */
212864b656fSDaniel Xu 	switch (off) {
213864b656fSDaniel Xu #if defined(CONFIG_NF_CONNTRACK_MARK)
214864b656fSDaniel Xu 	case offsetof(struct nf_conn, mark):
215864b656fSDaniel Xu 		end = offsetofend(struct nf_conn, mark);
216864b656fSDaniel Xu 		break;
217864b656fSDaniel Xu #endif
218864b656fSDaniel Xu 	default:
219864b656fSDaniel Xu 		bpf_log(log, "no write support to nf_conn at off %d\n", off);
220864b656fSDaniel Xu 		return -EACCES;
221864b656fSDaniel Xu 	}
222864b656fSDaniel Xu 
223864b656fSDaniel Xu 	if (off + size > end) {
224864b656fSDaniel Xu 		bpf_log(log,
225864b656fSDaniel Xu 			"write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
226864b656fSDaniel Xu 			off, size, end);
227864b656fSDaniel Xu 		return -EACCES;
228864b656fSDaniel Xu 	}
229864b656fSDaniel Xu 
230864b656fSDaniel Xu 	return 0;
231864b656fSDaniel Xu }
232864b656fSDaniel Xu 
233b4c2b959SKumar Kartikeya Dwivedi __diag_push();
2340b206c6dSKumar Kartikeya Dwivedi __diag_ignore_all("-Wmissing-prototypes",
235b4c2b959SKumar Kartikeya Dwivedi 		  "Global functions as their definitions will be in nf_conntrack BTF");
236b4c2b959SKumar Kartikeya Dwivedi 
237d7e79c97SLorenzo Bianconi /* bpf_xdp_ct_alloc - Allocate a new CT entry
238d7e79c97SLorenzo Bianconi  *
239d7e79c97SLorenzo Bianconi  * Parameters:
240d7e79c97SLorenzo Bianconi  * @xdp_ctx	- Pointer to ctx (xdp_md) in XDP program
241d7e79c97SLorenzo Bianconi  *		    Cannot be NULL
242d7e79c97SLorenzo Bianconi  * @bpf_tuple	- Pointer to memory representing the tuple to look up
243d7e79c97SLorenzo Bianconi  *		    Cannot be NULL
244d7e79c97SLorenzo Bianconi  * @tuple__sz	- Length of the tuple structure
245d7e79c97SLorenzo Bianconi  *		    Must be one of sizeof(bpf_tuple->ipv4) or
246d7e79c97SLorenzo Bianconi  *		    sizeof(bpf_tuple->ipv6)
247d7e79c97SLorenzo Bianconi  * @opts	- Additional options for allocation (documented above)
248d7e79c97SLorenzo Bianconi  *		    Cannot be NULL
249d7e79c97SLorenzo Bianconi  * @opts__sz	- Length of the bpf_ct_opts structure
250d7e79c97SLorenzo Bianconi  *		    Must be NF_BPF_CT_OPTS_SZ (12)
251d7e79c97SLorenzo Bianconi  */
252400031e0SDavid Vernet __bpf_kfunc struct nf_conn___init *
bpf_xdp_ct_alloc(struct xdp_md * xdp_ctx,struct bpf_sock_tuple * bpf_tuple,u32 tuple__sz,struct bpf_ct_opts * opts,u32 opts__sz)253d7e79c97SLorenzo Bianconi bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
254d7e79c97SLorenzo Bianconi 		 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
255d7e79c97SLorenzo Bianconi {
256d7e79c97SLorenzo Bianconi 	struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
257d7e79c97SLorenzo Bianconi 	struct nf_conn *nfct;
258d7e79c97SLorenzo Bianconi 
259d7e79c97SLorenzo Bianconi 	nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
260d7e79c97SLorenzo Bianconi 				       opts, opts__sz, 10);
261d7e79c97SLorenzo Bianconi 	if (IS_ERR(nfct)) {
262d7e79c97SLorenzo Bianconi 		if (opts)
263d7e79c97SLorenzo Bianconi 			opts->error = PTR_ERR(nfct);
264d7e79c97SLorenzo Bianconi 		return NULL;
265d7e79c97SLorenzo Bianconi 	}
266d7e79c97SLorenzo Bianconi 
267d7e79c97SLorenzo Bianconi 	return (struct nf_conn___init *)nfct;
268d7e79c97SLorenzo Bianconi }
269d7e79c97SLorenzo Bianconi 
270b4c2b959SKumar Kartikeya Dwivedi /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
271b4c2b959SKumar Kartikeya Dwivedi  *		       reference to it
272b4c2b959SKumar Kartikeya Dwivedi  *
273b4c2b959SKumar Kartikeya Dwivedi  * Parameters:
274b4c2b959SKumar Kartikeya Dwivedi  * @xdp_ctx	- Pointer to ctx (xdp_md) in XDP program
275b4c2b959SKumar Kartikeya Dwivedi  *		    Cannot be NULL
276b4c2b959SKumar Kartikeya Dwivedi  * @bpf_tuple	- Pointer to memory representing the tuple to look up
277b4c2b959SKumar Kartikeya Dwivedi  *		    Cannot be NULL
278b4c2b959SKumar Kartikeya Dwivedi  * @tuple__sz	- Length of the tuple structure
279b4c2b959SKumar Kartikeya Dwivedi  *		    Must be one of sizeof(bpf_tuple->ipv4) or
280b4c2b959SKumar Kartikeya Dwivedi  *		    sizeof(bpf_tuple->ipv6)
281b4c2b959SKumar Kartikeya Dwivedi  * @opts	- Additional options for lookup (documented above)
282b4c2b959SKumar Kartikeya Dwivedi  *		    Cannot be NULL
283b4c2b959SKumar Kartikeya Dwivedi  * @opts__sz	- Length of the bpf_ct_opts structure
284b4c2b959SKumar Kartikeya Dwivedi  *		    Must be NF_BPF_CT_OPTS_SZ (12)
285b4c2b959SKumar Kartikeya Dwivedi  */
286400031e0SDavid Vernet __bpf_kfunc struct nf_conn *
bpf_xdp_ct_lookup(struct xdp_md * xdp_ctx,struct bpf_sock_tuple * bpf_tuple,u32 tuple__sz,struct bpf_ct_opts * opts,u32 opts__sz)287b4c2b959SKumar Kartikeya Dwivedi bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
288b4c2b959SKumar Kartikeya Dwivedi 		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
289b4c2b959SKumar Kartikeya Dwivedi {
290b4c2b959SKumar Kartikeya Dwivedi 	struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
291b4c2b959SKumar Kartikeya Dwivedi 	struct net *caller_net;
292b4c2b959SKumar Kartikeya Dwivedi 	struct nf_conn *nfct;
293b4c2b959SKumar Kartikeya Dwivedi 
294b4c2b959SKumar Kartikeya Dwivedi 	caller_net = dev_net(ctx->rxq->dev);
295aed8ee7fSKumar Kartikeya Dwivedi 	nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
296b4c2b959SKumar Kartikeya Dwivedi 	if (IS_ERR(nfct)) {
297aed8ee7fSKumar Kartikeya Dwivedi 		if (opts)
298b4c2b959SKumar Kartikeya Dwivedi 			opts->error = PTR_ERR(nfct);
299b4c2b959SKumar Kartikeya Dwivedi 		return NULL;
300b4c2b959SKumar Kartikeya Dwivedi 	}
301b4c2b959SKumar Kartikeya Dwivedi 	return nfct;
302b4c2b959SKumar Kartikeya Dwivedi }
303b4c2b959SKumar Kartikeya Dwivedi 
304d7e79c97SLorenzo Bianconi /* bpf_skb_ct_alloc - Allocate a new CT entry
305d7e79c97SLorenzo Bianconi  *
306d7e79c97SLorenzo Bianconi  * Parameters:
307d7e79c97SLorenzo Bianconi  * @skb_ctx	- Pointer to ctx (__sk_buff) in TC program
308d7e79c97SLorenzo Bianconi  *		    Cannot be NULL
309d7e79c97SLorenzo Bianconi  * @bpf_tuple	- Pointer to memory representing the tuple to look up
310d7e79c97SLorenzo Bianconi  *		    Cannot be NULL
311d7e79c97SLorenzo Bianconi  * @tuple__sz	- Length of the tuple structure
312d7e79c97SLorenzo Bianconi  *		    Must be one of sizeof(bpf_tuple->ipv4) or
313d7e79c97SLorenzo Bianconi  *		    sizeof(bpf_tuple->ipv6)
314d7e79c97SLorenzo Bianconi  * @opts	- Additional options for allocation (documented above)
315d7e79c97SLorenzo Bianconi  *		    Cannot be NULL
316d7e79c97SLorenzo Bianconi  * @opts__sz	- Length of the bpf_ct_opts structure
317d7e79c97SLorenzo Bianconi  *		    Must be NF_BPF_CT_OPTS_SZ (12)
318d7e79c97SLorenzo Bianconi  */
319400031e0SDavid Vernet __bpf_kfunc struct nf_conn___init *
bpf_skb_ct_alloc(struct __sk_buff * skb_ctx,struct bpf_sock_tuple * bpf_tuple,u32 tuple__sz,struct bpf_ct_opts * opts,u32 opts__sz)320d7e79c97SLorenzo Bianconi bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
321d7e79c97SLorenzo Bianconi 		 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
322d7e79c97SLorenzo Bianconi {
323d7e79c97SLorenzo Bianconi 	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
324d7e79c97SLorenzo Bianconi 	struct nf_conn *nfct;
325d7e79c97SLorenzo Bianconi 	struct net *net;
326d7e79c97SLorenzo Bianconi 
327d7e79c97SLorenzo Bianconi 	net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
328d7e79c97SLorenzo Bianconi 	nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
329d7e79c97SLorenzo Bianconi 	if (IS_ERR(nfct)) {
330d7e79c97SLorenzo Bianconi 		if (opts)
331d7e79c97SLorenzo Bianconi 			opts->error = PTR_ERR(nfct);
332d7e79c97SLorenzo Bianconi 		return NULL;
333d7e79c97SLorenzo Bianconi 	}
334d7e79c97SLorenzo Bianconi 
335d7e79c97SLorenzo Bianconi 	return (struct nf_conn___init *)nfct;
336d7e79c97SLorenzo Bianconi }
337d7e79c97SLorenzo Bianconi 
338b4c2b959SKumar Kartikeya Dwivedi /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
339b4c2b959SKumar Kartikeya Dwivedi  *		       reference to it
340b4c2b959SKumar Kartikeya Dwivedi  *
341b4c2b959SKumar Kartikeya Dwivedi  * Parameters:
342b4c2b959SKumar Kartikeya Dwivedi  * @skb_ctx	- Pointer to ctx (__sk_buff) in TC program
343b4c2b959SKumar Kartikeya Dwivedi  *		    Cannot be NULL
344b4c2b959SKumar Kartikeya Dwivedi  * @bpf_tuple	- Pointer to memory representing the tuple to look up
345b4c2b959SKumar Kartikeya Dwivedi  *		    Cannot be NULL
346b4c2b959SKumar Kartikeya Dwivedi  * @tuple__sz	- Length of the tuple structure
347b4c2b959SKumar Kartikeya Dwivedi  *		    Must be one of sizeof(bpf_tuple->ipv4) or
348b4c2b959SKumar Kartikeya Dwivedi  *		    sizeof(bpf_tuple->ipv6)
349b4c2b959SKumar Kartikeya Dwivedi  * @opts	- Additional options for lookup (documented above)
350b4c2b959SKumar Kartikeya Dwivedi  *		    Cannot be NULL
351b4c2b959SKumar Kartikeya Dwivedi  * @opts__sz	- Length of the bpf_ct_opts structure
352b4c2b959SKumar Kartikeya Dwivedi  *		    Must be NF_BPF_CT_OPTS_SZ (12)
353b4c2b959SKumar Kartikeya Dwivedi  */
354400031e0SDavid Vernet __bpf_kfunc struct nf_conn *
bpf_skb_ct_lookup(struct __sk_buff * skb_ctx,struct bpf_sock_tuple * bpf_tuple,u32 tuple__sz,struct bpf_ct_opts * opts,u32 opts__sz)355b4c2b959SKumar Kartikeya Dwivedi bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
356b4c2b959SKumar Kartikeya Dwivedi 		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
357b4c2b959SKumar Kartikeya Dwivedi {
358b4c2b959SKumar Kartikeya Dwivedi 	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
359b4c2b959SKumar Kartikeya Dwivedi 	struct net *caller_net;
360b4c2b959SKumar Kartikeya Dwivedi 	struct nf_conn *nfct;
361b4c2b959SKumar Kartikeya Dwivedi 
362b4c2b959SKumar Kartikeya Dwivedi 	caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
363aed8ee7fSKumar Kartikeya Dwivedi 	nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
364b4c2b959SKumar Kartikeya Dwivedi 	if (IS_ERR(nfct)) {
365aed8ee7fSKumar Kartikeya Dwivedi 		if (opts)
366b4c2b959SKumar Kartikeya Dwivedi 			opts->error = PTR_ERR(nfct);
367b4c2b959SKumar Kartikeya Dwivedi 		return NULL;
368b4c2b959SKumar Kartikeya Dwivedi 	}
369b4c2b959SKumar Kartikeya Dwivedi 	return nfct;
370b4c2b959SKumar Kartikeya Dwivedi }
371b4c2b959SKumar Kartikeya Dwivedi 
372d7e79c97SLorenzo Bianconi /* bpf_ct_insert_entry - Add the provided entry into a CT map
373d7e79c97SLorenzo Bianconi  *
374d7e79c97SLorenzo Bianconi  * This must be invoked for referenced PTR_TO_BTF_ID.
375d7e79c97SLorenzo Bianconi  *
3760b389236SKumar Kartikeya Dwivedi  * @nfct	 - Pointer to referenced nf_conn___init object, obtained
377d7e79c97SLorenzo Bianconi  *		   using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
378d7e79c97SLorenzo Bianconi  */
bpf_ct_insert_entry(struct nf_conn___init * nfct_i)379400031e0SDavid Vernet __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
380d7e79c97SLorenzo Bianconi {
3810b389236SKumar Kartikeya Dwivedi 	struct nf_conn *nfct = (struct nf_conn *)nfct_i;
382d7e79c97SLorenzo Bianconi 	int err;
383d7e79c97SLorenzo Bianconi 
384*837723b2SIlya Leoshkevich 	if (!nf_ct_is_confirmed(nfct))
385*837723b2SIlya Leoshkevich 		nfct->timeout += nfct_time_stamp;
3862cdaa3eeSPablo Neira Ayuso 	nfct->status |= IPS_CONFIRMED;
387d7e79c97SLorenzo Bianconi 	err = nf_conntrack_hash_check_insert(nfct);
388d7e79c97SLorenzo Bianconi 	if (err < 0) {
389d7e79c97SLorenzo Bianconi 		nf_conntrack_free(nfct);
390d7e79c97SLorenzo Bianconi 		return NULL;
391d7e79c97SLorenzo Bianconi 	}
392d7e79c97SLorenzo Bianconi 	return nfct;
393d7e79c97SLorenzo Bianconi }
394d7e79c97SLorenzo Bianconi 
395b4c2b959SKumar Kartikeya Dwivedi /* bpf_ct_release - Release acquired nf_conn object
396b4c2b959SKumar Kartikeya Dwivedi  *
397b4c2b959SKumar Kartikeya Dwivedi  * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
398b4c2b959SKumar Kartikeya Dwivedi  * the program if any references remain in the program in all of the explored
399b4c2b959SKumar Kartikeya Dwivedi  * states.
400b4c2b959SKumar Kartikeya Dwivedi  *
401b4c2b959SKumar Kartikeya Dwivedi  * Parameters:
402b4c2b959SKumar Kartikeya Dwivedi  * @nf_conn	 - Pointer to referenced nf_conn object, obtained using
403b4c2b959SKumar Kartikeya Dwivedi  *		   bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
404b4c2b959SKumar Kartikeya Dwivedi  */
bpf_ct_release(struct nf_conn * nfct)405400031e0SDavid Vernet __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
406b4c2b959SKumar Kartikeya Dwivedi {
407b4c2b959SKumar Kartikeya Dwivedi 	nf_ct_put(nfct);
408b4c2b959SKumar Kartikeya Dwivedi }
409b4c2b959SKumar Kartikeya Dwivedi 
4100b389236SKumar Kartikeya Dwivedi /* bpf_ct_set_timeout - Set timeout of allocated nf_conn
4110b389236SKumar Kartikeya Dwivedi  *
4120b389236SKumar Kartikeya Dwivedi  * Sets the default timeout of newly allocated nf_conn before insertion.
4130b389236SKumar Kartikeya Dwivedi  * This helper must be invoked for refcounted pointer to nf_conn___init.
4140b389236SKumar Kartikeya Dwivedi  *
4150b389236SKumar Kartikeya Dwivedi  * Parameters:
4160b389236SKumar Kartikeya Dwivedi  * @nfct	 - Pointer to referenced nf_conn object, obtained using
4170b389236SKumar Kartikeya Dwivedi  *                 bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
4180b389236SKumar Kartikeya Dwivedi  * @timeout      - Timeout in msecs.
4190b389236SKumar Kartikeya Dwivedi  */
bpf_ct_set_timeout(struct nf_conn___init * nfct,u32 timeout)420400031e0SDavid Vernet __bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
4210b389236SKumar Kartikeya Dwivedi {
4220b389236SKumar Kartikeya Dwivedi 	__nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
4230b389236SKumar Kartikeya Dwivedi }
4240b389236SKumar Kartikeya Dwivedi 
4250b389236SKumar Kartikeya Dwivedi /* bpf_ct_change_timeout - Change timeout of inserted nf_conn
4260b389236SKumar Kartikeya Dwivedi  *
4270b389236SKumar Kartikeya Dwivedi  * Change timeout associated of the inserted or looked up nf_conn.
4280b389236SKumar Kartikeya Dwivedi  * This helper must be invoked for refcounted pointer to nf_conn.
4290b389236SKumar Kartikeya Dwivedi  *
4300b389236SKumar Kartikeya Dwivedi  * Parameters:
4310b389236SKumar Kartikeya Dwivedi  * @nfct	 - Pointer to referenced nf_conn object, obtained using
4320b389236SKumar Kartikeya Dwivedi  *		   bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
4330b389236SKumar Kartikeya Dwivedi  * @timeout      - New timeout in msecs.
4340b389236SKumar Kartikeya Dwivedi  */
bpf_ct_change_timeout(struct nf_conn * nfct,u32 timeout)435400031e0SDavid Vernet __bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
4360b389236SKumar Kartikeya Dwivedi {
4370b389236SKumar Kartikeya Dwivedi 	return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
4380b389236SKumar Kartikeya Dwivedi }
4390b389236SKumar Kartikeya Dwivedi 
440ef69aa3aSLorenzo Bianconi /* bpf_ct_set_status - Set status field of allocated nf_conn
441ef69aa3aSLorenzo Bianconi  *
442ef69aa3aSLorenzo Bianconi  * Set the status field of the newly allocated nf_conn before insertion.
443ef69aa3aSLorenzo Bianconi  * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
444ef69aa3aSLorenzo Bianconi  *
445ef69aa3aSLorenzo Bianconi  * Parameters:
446ef69aa3aSLorenzo Bianconi  * @nfct	 - Pointer to referenced nf_conn object, obtained using
447ef69aa3aSLorenzo Bianconi  *		   bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
448ef69aa3aSLorenzo Bianconi  * @status       - New status value.
449ef69aa3aSLorenzo Bianconi  */
bpf_ct_set_status(const struct nf_conn___init * nfct,u32 status)450400031e0SDavid Vernet __bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
451ef69aa3aSLorenzo Bianconi {
452ef69aa3aSLorenzo Bianconi 	return nf_ct_change_status_common((struct nf_conn *)nfct, status);
453ef69aa3aSLorenzo Bianconi }
454ef69aa3aSLorenzo Bianconi 
455ef69aa3aSLorenzo Bianconi /* bpf_ct_change_status - Change status of inserted nf_conn
456ef69aa3aSLorenzo Bianconi  *
457ef69aa3aSLorenzo Bianconi  * Change the status field of the provided connection tracking entry.
458ef69aa3aSLorenzo Bianconi  * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
459ef69aa3aSLorenzo Bianconi  *
460ef69aa3aSLorenzo Bianconi  * Parameters:
461ef69aa3aSLorenzo Bianconi  * @nfct	 - Pointer to referenced nf_conn object, obtained using
462ef69aa3aSLorenzo Bianconi  *		   bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
463ef69aa3aSLorenzo Bianconi  * @status       - New status value.
464ef69aa3aSLorenzo Bianconi  */
bpf_ct_change_status(struct nf_conn * nfct,u32 status)465400031e0SDavid Vernet __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
466ef69aa3aSLorenzo Bianconi {
467ef69aa3aSLorenzo Bianconi 	return nf_ct_change_status_common(nfct, status);
468ef69aa3aSLorenzo Bianconi }
469ef69aa3aSLorenzo Bianconi 
470b4c2b959SKumar Kartikeya Dwivedi __diag_pop()
471b4c2b959SKumar Kartikeya Dwivedi 
472a4703e31SKumar Kartikeya Dwivedi BTF_SET8_START(nf_ct_kfunc_set)
473d7e79c97SLorenzo Bianconi BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
474a4703e31SKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
475d7e79c97SLorenzo Bianconi BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
476a4703e31SKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
477d7e79c97SLorenzo Bianconi BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
478a4703e31SKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
4790b389236SKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
4800b389236SKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
481ef69aa3aSLorenzo Bianconi BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
482ef69aa3aSLorenzo Bianconi BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
483a4703e31SKumar Kartikeya Dwivedi BTF_SET8_END(nf_ct_kfunc_set)
484b4c2b959SKumar Kartikeya Dwivedi 
485a4703e31SKumar Kartikeya Dwivedi static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
486b4c2b959SKumar Kartikeya Dwivedi 	.owner = THIS_MODULE,
487a4703e31SKumar Kartikeya Dwivedi 	.set   = &nf_ct_kfunc_set,
488b4c2b959SKumar Kartikeya Dwivedi };
489b4c2b959SKumar Kartikeya Dwivedi 
register_nf_conntrack_bpf(void)490b4c2b959SKumar Kartikeya Dwivedi int register_nf_conntrack_bpf(void)
491b4c2b959SKumar Kartikeya Dwivedi {
492b4c2b959SKumar Kartikeya Dwivedi 	int ret;
493b4c2b959SKumar Kartikeya Dwivedi 
494a4703e31SKumar Kartikeya Dwivedi 	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
495864b656fSDaniel Xu 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
496864b656fSDaniel Xu 	if (!ret) {
497864b656fSDaniel Xu 		mutex_lock(&nf_conn_btf_access_lock);
4985a090aa3SDaniel Xu 		nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
499864b656fSDaniel Xu 		mutex_unlock(&nf_conn_btf_access_lock);
500864b656fSDaniel Xu 	}
501864b656fSDaniel Xu 
502864b656fSDaniel Xu 	return ret;
503864b656fSDaniel Xu }
504864b656fSDaniel Xu 
cleanup_nf_conntrack_bpf(void)505864b656fSDaniel Xu void cleanup_nf_conntrack_bpf(void)
506864b656fSDaniel Xu {
507864b656fSDaniel Xu 	mutex_lock(&nf_conn_btf_access_lock);
5085a090aa3SDaniel Xu 	nfct_btf_struct_access = NULL;
509864b656fSDaniel Xu 	mutex_unlock(&nf_conn_btf_access_lock);
510b4c2b959SKumar Kartikeya Dwivedi }
511