1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Unstable Conntrack Helpers for XDP and TC-BPF hook 3 * 4 * These are called from the XDP and SCHED_CLS BPF programs. Note that it is 5 * allowed to break compatibility for these functions since the interface they 6 * are exposed through to BPF programs is explicitly unstable. 7 */ 8 9 #include <linux/bpf_verifier.h> 10 #include <linux/bpf.h> 11 #include <linux/btf.h> 12 #include <linux/filter.h> 13 #include <linux/mutex.h> 14 #include <linux/types.h> 15 #include <linux/btf_ids.h> 16 #include <linux/net_namespace.h> 17 #include <net/netfilter/nf_conntrack_bpf.h> 18 #include <net/netfilter/nf_conntrack_core.h> 19 20 /* bpf_ct_opts - Options for CT lookup helpers 21 * 22 * Members: 23 * @netns_id - Specify the network namespace for lookup 24 * Values: 25 * BPF_F_CURRENT_NETNS (-1) 26 * Use namespace associated with ctx (xdp_md, __sk_buff) 27 * [0, S32_MAX] 28 * Network Namespace ID 29 * @error - Out parameter, set for any errors encountered 30 * Values: 31 * -EINVAL - Passed NULL for bpf_tuple pointer 32 * -EINVAL - opts->reserved is not 0 33 * -EINVAL - netns_id is less than -1 34 * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12) 35 * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP 36 * -ENONET - No network namespace found for netns_id 37 * -ENOENT - Conntrack lookup could not find entry for tuple 38 * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4) 39 * or sizeof(tuple->ipv6) 40 * @l4proto - Layer 4 protocol 41 * Values: 42 * IPPROTO_TCP, IPPROTO_UDP 43 * @dir: - connection tracking tuple direction. 44 * @reserved - Reserved member, will be reused for more options in future 45 * Values: 46 * 0 47 */ 48 struct bpf_ct_opts { 49 s32 netns_id; 50 s32 error; 51 u8 l4proto; 52 u8 dir; 53 u8 reserved[2]; 54 }; 55 56 enum { 57 NF_BPF_CT_OPTS_SZ = 12, 58 }; 59 60 static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple, 61 u32 tuple_len, u8 protonum, u8 dir, 62 struct nf_conntrack_tuple *tuple) 63 { 64 union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3; 65 union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3; 66 union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u 67 : &tuple->src.u; 68 union nf_conntrack_man_proto *dport = dir ? &tuple->src.u 69 : (void *)&tuple->dst.u; 70 71 if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) 72 return -EPROTO; 73 74 memset(tuple, 0, sizeof(*tuple)); 75 76 switch (tuple_len) { 77 case sizeof(bpf_tuple->ipv4): 78 tuple->src.l3num = AF_INET; 79 src->ip = bpf_tuple->ipv4.saddr; 80 sport->tcp.port = bpf_tuple->ipv4.sport; 81 dst->ip = bpf_tuple->ipv4.daddr; 82 dport->tcp.port = bpf_tuple->ipv4.dport; 83 break; 84 case sizeof(bpf_tuple->ipv6): 85 tuple->src.l3num = AF_INET6; 86 memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); 87 sport->tcp.port = bpf_tuple->ipv6.sport; 88 memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); 89 dport->tcp.port = bpf_tuple->ipv6.dport; 90 break; 91 default: 92 return -EAFNOSUPPORT; 93 } 94 tuple->dst.protonum = protonum; 95 tuple->dst.dir = dir; 96 97 return 0; 98 } 99 100 static struct nf_conn * 101 __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, 102 u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len, 103 u32 timeout) 104 { 105 struct nf_conntrack_tuple otuple, rtuple; 106 struct nf_conn *ct; 107 int err; 108 109 if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || 110 opts_len != NF_BPF_CT_OPTS_SZ) 111 return ERR_PTR(-EINVAL); 112 113 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) 114 return ERR_PTR(-EINVAL); 115 116 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, 117 IP_CT_DIR_ORIGINAL, &otuple); 118 if (err < 0) 119 return ERR_PTR(err); 120 121 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, 122 IP_CT_DIR_REPLY, &rtuple); 123 if (err < 0) 124 return ERR_PTR(err); 125 126 if (opts->netns_id >= 0) { 127 net = get_net_ns_by_id(net, opts->netns_id); 128 if (unlikely(!net)) 129 return ERR_PTR(-ENONET); 130 } 131 132 ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple, 133 GFP_ATOMIC); 134 if (IS_ERR(ct)) 135 goto out; 136 137 memset(&ct->proto, 0, sizeof(ct->proto)); 138 __nf_ct_set_timeout(ct, timeout * HZ); 139 140 out: 141 if (opts->netns_id >= 0) 142 put_net(net); 143 144 return ct; 145 } 146 147 static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, 148 struct bpf_sock_tuple *bpf_tuple, 149 u32 tuple_len, struct bpf_ct_opts *opts, 150 u32 opts_len) 151 { 152 struct nf_conntrack_tuple_hash *hash; 153 struct nf_conntrack_tuple tuple; 154 struct nf_conn *ct; 155 int err; 156 157 if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || 158 opts_len != NF_BPF_CT_OPTS_SZ) 159 return ERR_PTR(-EINVAL); 160 if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP)) 161 return ERR_PTR(-EPROTO); 162 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) 163 return ERR_PTR(-EINVAL); 164 165 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, 166 IP_CT_DIR_ORIGINAL, &tuple); 167 if (err < 0) 168 return ERR_PTR(err); 169 170 if (opts->netns_id >= 0) { 171 net = get_net_ns_by_id(net, opts->netns_id); 172 if (unlikely(!net)) 173 return ERR_PTR(-ENONET); 174 } 175 176 hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); 177 if (opts->netns_id >= 0) 178 put_net(net); 179 if (!hash) 180 return ERR_PTR(-ENOENT); 181 182 ct = nf_ct_tuplehash_to_ctrack(hash); 183 opts->dir = NF_CT_DIRECTION(hash); 184 185 return ct; 186 } 187 188 BTF_ID_LIST(btf_nf_conn_ids) 189 BTF_ID(struct, nf_conn) 190 BTF_ID(struct, nf_conn___init) 191 192 /* Check writes into `struct nf_conn` */ 193 static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, 194 const struct bpf_reg_state *reg, 195 int off, int size) 196 { 197 const struct btf_type *ncit, *nct, *t; 198 size_t end; 199 200 ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]); 201 nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]); 202 t = btf_type_by_id(reg->btf, reg->btf_id); 203 if (t != nct && t != ncit) { 204 bpf_log(log, "only read is supported\n"); 205 return -EACCES; 206 } 207 208 /* `struct nf_conn` and `struct nf_conn___init` have the same layout 209 * so we are safe to simply merge offset checks here 210 */ 211 switch (off) { 212 #if defined(CONFIG_NF_CONNTRACK_MARK) 213 case offsetof(struct nf_conn, mark): 214 end = offsetofend(struct nf_conn, mark); 215 break; 216 #endif 217 default: 218 bpf_log(log, "no write support to nf_conn at off %d\n", off); 219 return -EACCES; 220 } 221 222 if (off + size > end) { 223 bpf_log(log, 224 "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n", 225 off, size, end); 226 return -EACCES; 227 } 228 229 return 0; 230 } 231 232 __diag_push(); 233 __diag_ignore_all("-Wmissing-prototypes", 234 "Global functions as their definitions will be in nf_conntrack BTF"); 235 236 /* bpf_xdp_ct_alloc - Allocate a new CT entry 237 * 238 * Parameters: 239 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program 240 * Cannot be NULL 241 * @bpf_tuple - Pointer to memory representing the tuple to look up 242 * Cannot be NULL 243 * @tuple__sz - Length of the tuple structure 244 * Must be one of sizeof(bpf_tuple->ipv4) or 245 * sizeof(bpf_tuple->ipv6) 246 * @opts - Additional options for allocation (documented above) 247 * Cannot be NULL 248 * @opts__sz - Length of the bpf_ct_opts structure 249 * Must be NF_BPF_CT_OPTS_SZ (12) 250 */ 251 __bpf_kfunc struct nf_conn___init * 252 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, 253 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) 254 { 255 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; 256 struct nf_conn *nfct; 257 258 nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz, 259 opts, opts__sz, 10); 260 if (IS_ERR(nfct)) { 261 if (opts) 262 opts->error = PTR_ERR(nfct); 263 return NULL; 264 } 265 266 return (struct nf_conn___init *)nfct; 267 } 268 269 /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a 270 * reference to it 271 * 272 * Parameters: 273 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program 274 * Cannot be NULL 275 * @bpf_tuple - Pointer to memory representing the tuple to look up 276 * Cannot be NULL 277 * @tuple__sz - Length of the tuple structure 278 * Must be one of sizeof(bpf_tuple->ipv4) or 279 * sizeof(bpf_tuple->ipv6) 280 * @opts - Additional options for lookup (documented above) 281 * Cannot be NULL 282 * @opts__sz - Length of the bpf_ct_opts structure 283 * Must be NF_BPF_CT_OPTS_SZ (12) 284 */ 285 __bpf_kfunc struct nf_conn * 286 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, 287 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) 288 { 289 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; 290 struct net *caller_net; 291 struct nf_conn *nfct; 292 293 caller_net = dev_net(ctx->rxq->dev); 294 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz); 295 if (IS_ERR(nfct)) { 296 if (opts) 297 opts->error = PTR_ERR(nfct); 298 return NULL; 299 } 300 return nfct; 301 } 302 303 /* bpf_skb_ct_alloc - Allocate a new CT entry 304 * 305 * Parameters: 306 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program 307 * Cannot be NULL 308 * @bpf_tuple - Pointer to memory representing the tuple to look up 309 * Cannot be NULL 310 * @tuple__sz - Length of the tuple structure 311 * Must be one of sizeof(bpf_tuple->ipv4) or 312 * sizeof(bpf_tuple->ipv6) 313 * @opts - Additional options for allocation (documented above) 314 * Cannot be NULL 315 * @opts__sz - Length of the bpf_ct_opts structure 316 * Must be NF_BPF_CT_OPTS_SZ (12) 317 */ 318 __bpf_kfunc struct nf_conn___init * 319 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, 320 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) 321 { 322 struct sk_buff *skb = (struct sk_buff *)skb_ctx; 323 struct nf_conn *nfct; 324 struct net *net; 325 326 net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); 327 nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10); 328 if (IS_ERR(nfct)) { 329 if (opts) 330 opts->error = PTR_ERR(nfct); 331 return NULL; 332 } 333 334 return (struct nf_conn___init *)nfct; 335 } 336 337 /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a 338 * reference to it 339 * 340 * Parameters: 341 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program 342 * Cannot be NULL 343 * @bpf_tuple - Pointer to memory representing the tuple to look up 344 * Cannot be NULL 345 * @tuple__sz - Length of the tuple structure 346 * Must be one of sizeof(bpf_tuple->ipv4) or 347 * sizeof(bpf_tuple->ipv6) 348 * @opts - Additional options for lookup (documented above) 349 * Cannot be NULL 350 * @opts__sz - Length of the bpf_ct_opts structure 351 * Must be NF_BPF_CT_OPTS_SZ (12) 352 */ 353 __bpf_kfunc struct nf_conn * 354 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, 355 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) 356 { 357 struct sk_buff *skb = (struct sk_buff *)skb_ctx; 358 struct net *caller_net; 359 struct nf_conn *nfct; 360 361 caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); 362 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz); 363 if (IS_ERR(nfct)) { 364 if (opts) 365 opts->error = PTR_ERR(nfct); 366 return NULL; 367 } 368 return nfct; 369 } 370 371 /* bpf_ct_insert_entry - Add the provided entry into a CT map 372 * 373 * This must be invoked for referenced PTR_TO_BTF_ID. 374 * 375 * @nfct - Pointer to referenced nf_conn___init object, obtained 376 * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc. 377 */ 378 __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) 379 { 380 struct nf_conn *nfct = (struct nf_conn *)nfct_i; 381 int err; 382 383 nfct->status |= IPS_CONFIRMED; 384 err = nf_conntrack_hash_check_insert(nfct); 385 if (err < 0) { 386 nf_conntrack_free(nfct); 387 return NULL; 388 } 389 return nfct; 390 } 391 392 /* bpf_ct_release - Release acquired nf_conn object 393 * 394 * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects 395 * the program if any references remain in the program in all of the explored 396 * states. 397 * 398 * Parameters: 399 * @nf_conn - Pointer to referenced nf_conn object, obtained using 400 * bpf_xdp_ct_lookup or bpf_skb_ct_lookup. 401 */ 402 __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct) 403 { 404 nf_ct_put(nfct); 405 } 406 407 /* bpf_ct_set_timeout - Set timeout of allocated nf_conn 408 * 409 * Sets the default timeout of newly allocated nf_conn before insertion. 410 * This helper must be invoked for refcounted pointer to nf_conn___init. 411 * 412 * Parameters: 413 * @nfct - Pointer to referenced nf_conn object, obtained using 414 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. 415 * @timeout - Timeout in msecs. 416 */ 417 __bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout) 418 { 419 __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout)); 420 } 421 422 /* bpf_ct_change_timeout - Change timeout of inserted nf_conn 423 * 424 * Change timeout associated of the inserted or looked up nf_conn. 425 * This helper must be invoked for refcounted pointer to nf_conn. 426 * 427 * Parameters: 428 * @nfct - Pointer to referenced nf_conn object, obtained using 429 * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup. 430 * @timeout - New timeout in msecs. 431 */ 432 __bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout) 433 { 434 return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout)); 435 } 436 437 /* bpf_ct_set_status - Set status field of allocated nf_conn 438 * 439 * Set the status field of the newly allocated nf_conn before insertion. 440 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init. 441 * 442 * Parameters: 443 * @nfct - Pointer to referenced nf_conn object, obtained using 444 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. 445 * @status - New status value. 446 */ 447 __bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status) 448 { 449 return nf_ct_change_status_common((struct nf_conn *)nfct, status); 450 } 451 452 /* bpf_ct_change_status - Change status of inserted nf_conn 453 * 454 * Change the status field of the provided connection tracking entry. 455 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn. 456 * 457 * Parameters: 458 * @nfct - Pointer to referenced nf_conn object, obtained using 459 * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup. 460 * @status - New status value. 461 */ 462 __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) 463 { 464 return nf_ct_change_status_common(nfct, status); 465 } 466 467 __diag_pop() 468 469 BTF_SET8_START(nf_ct_kfunc_set) 470 BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) 471 BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) 472 BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL) 473 BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL) 474 BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE) 475 BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE) 476 BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) 477 BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) 478 BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) 479 BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) 480 BTF_SET8_END(nf_ct_kfunc_set) 481 482 static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { 483 .owner = THIS_MODULE, 484 .set = &nf_ct_kfunc_set, 485 }; 486 487 int register_nf_conntrack_bpf(void) 488 { 489 int ret; 490 491 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set); 492 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); 493 if (!ret) { 494 mutex_lock(&nf_conn_btf_access_lock); 495 nfct_btf_struct_access = _nf_conntrack_btf_struct_access; 496 mutex_unlock(&nf_conn_btf_access_lock); 497 } 498 499 return ret; 500 } 501 502 void cleanup_nf_conntrack_bpf(void) 503 { 504 mutex_lock(&nf_conn_btf_access_lock); 505 nfct_btf_struct_access = NULL; 506 mutex_unlock(&nf_conn_btf_access_lock); 507 } 508