1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Functions to manage eBPF programs attached to cgroups 4 * 5 * Copyright (c) 2016 Daniel Mack 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/atomic.h> 10 #include <linux/cgroup.h> 11 #include <linux/filter.h> 12 #include <linux/slab.h> 13 #include <linux/sysctl.h> 14 #include <linux/string.h> 15 #include <linux/bpf.h> 16 #include <linux/bpf-cgroup.h> 17 #include <linux/bpf_lsm.h> 18 #include <linux/bpf_verifier.h> 19 #include <net/sock.h> 20 #include <net/bpf_sk_storage.h> 21 22 #include "../cgroup/cgroup-internal.h" 23 24 DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE); 25 EXPORT_SYMBOL(cgroup_bpf_enabled_key); 26 27 /* 28 * cgroup bpf destruction makes heavy use of work items and there can be a lot 29 * of concurrent destructions. Use a separate workqueue so that cgroup bpf 30 * destruction work items don't end up filling up max_active of system_wq 31 * which may lead to deadlock. 32 */ 33 static struct workqueue_struct *cgroup_bpf_destroy_wq; 34 35 static int __init cgroup_bpf_wq_init(void) 36 { 37 cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1); 38 if (!cgroup_bpf_destroy_wq) 39 panic("Failed to alloc workqueue for cgroup bpf destroy.\n"); 40 return 0; 41 } 42 core_initcall(cgroup_bpf_wq_init); 43 44 /* __always_inline is necessary to prevent indirect call through run_prog 45 * function pointer. 46 */ 47 static __always_inline int 48 bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, 49 enum cgroup_bpf_attach_type atype, 50 const void *ctx, bpf_prog_run_fn run_prog, 51 int retval, u32 *ret_flags) 52 { 53 const struct bpf_prog_array_item *item; 54 const struct bpf_prog *prog; 55 const struct bpf_prog_array *array; 56 struct bpf_run_ctx *old_run_ctx; 57 struct bpf_cg_run_ctx run_ctx; 58 u32 func_ret; 59 60 run_ctx.retval = retval; 61 migrate_disable(); 62 rcu_read_lock(); 63 array = rcu_dereference(cgrp->effective[atype]); 64 item = &array->items[0]; 65 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 66 while ((prog = READ_ONCE(item->prog))) { 67 run_ctx.prog_item = item; 68 func_ret = run_prog(prog, ctx); 69 if (ret_flags) { 70 *(ret_flags) |= (func_ret >> 1); 71 func_ret &= 1; 72 } 73 if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval)) 74 run_ctx.retval = -EPERM; 75 item++; 76 } 77 bpf_reset_run_ctx(old_run_ctx); 78 rcu_read_unlock(); 79 migrate_enable(); 80 return run_ctx.retval; 81 } 82 83 unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, 84 const struct bpf_insn *insn) 85 { 86 const struct bpf_prog *shim_prog; 87 struct sock *sk; 88 struct cgroup *cgrp; 89 int ret = 0; 90 u64 *args; 91 92 args = (u64 *)ctx; 93 sk = (void *)(unsigned long)args[0]; 94 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 95 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 96 97 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 98 if (likely(cgrp)) 99 ret = bpf_prog_run_array_cg(&cgrp->bpf, 100 shim_prog->aux->cgroup_atype, 101 ctx, bpf_prog_run, 0, NULL); 102 return ret; 103 } 104 105 unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, 106 const struct bpf_insn *insn) 107 { 108 const struct bpf_prog *shim_prog; 109 struct socket *sock; 110 struct cgroup *cgrp; 111 int ret = 0; 112 u64 *args; 113 114 args = (u64 *)ctx; 115 sock = (void *)(unsigned long)args[0]; 116 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 117 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 118 119 cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data); 120 if (likely(cgrp)) 121 ret = bpf_prog_run_array_cg(&cgrp->bpf, 122 shim_prog->aux->cgroup_atype, 123 ctx, bpf_prog_run, 0, NULL); 124 return ret; 125 } 126 127 unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, 128 const struct bpf_insn *insn) 129 { 130 const struct bpf_prog *shim_prog; 131 struct cgroup *cgrp; 132 int ret = 0; 133 134 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 135 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 136 137 /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */ 138 cgrp = task_dfl_cgroup(current); 139 if (likely(cgrp)) 140 ret = bpf_prog_run_array_cg(&cgrp->bpf, 141 shim_prog->aux->cgroup_atype, 142 ctx, bpf_prog_run, 0, NULL); 143 return ret; 144 } 145 146 #ifdef CONFIG_BPF_LSM 147 struct cgroup_lsm_atype { 148 u32 attach_btf_id; 149 int refcnt; 150 }; 151 152 static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM]; 153 154 static enum cgroup_bpf_attach_type 155 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) 156 { 157 int i; 158 159 lockdep_assert_held(&cgroup_mutex); 160 161 if (attach_type != BPF_LSM_CGROUP) 162 return to_cgroup_bpf_attach_type(attach_type); 163 164 for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++) 165 if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id) 166 return CGROUP_LSM_START + i; 167 168 for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++) 169 if (cgroup_lsm_atype[i].attach_btf_id == 0) 170 return CGROUP_LSM_START + i; 171 172 return -E2BIG; 173 174 } 175 176 void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) 177 { 178 int i = cgroup_atype - CGROUP_LSM_START; 179 180 lockdep_assert_held(&cgroup_mutex); 181 182 WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id && 183 cgroup_lsm_atype[i].attach_btf_id != attach_btf_id); 184 185 cgroup_lsm_atype[i].attach_btf_id = attach_btf_id; 186 cgroup_lsm_atype[i].refcnt++; 187 } 188 189 void bpf_cgroup_atype_put(int cgroup_atype) 190 { 191 int i = cgroup_atype - CGROUP_LSM_START; 192 193 cgroup_lock(); 194 if (--cgroup_lsm_atype[i].refcnt <= 0) 195 cgroup_lsm_atype[i].attach_btf_id = 0; 196 WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0); 197 cgroup_unlock(); 198 } 199 #else 200 static enum cgroup_bpf_attach_type 201 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) 202 { 203 if (attach_type != BPF_LSM_CGROUP) 204 return to_cgroup_bpf_attach_type(attach_type); 205 return -EOPNOTSUPP; 206 } 207 #endif /* CONFIG_BPF_LSM */ 208 209 void cgroup_bpf_offline(struct cgroup *cgrp) 210 { 211 cgroup_get(cgrp); 212 percpu_ref_kill(&cgrp->bpf.refcnt); 213 } 214 215 static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[]) 216 { 217 enum bpf_cgroup_storage_type stype; 218 219 for_each_cgroup_storage_type(stype) 220 bpf_cgroup_storage_free(storages[stype]); 221 } 222 223 static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[], 224 struct bpf_cgroup_storage *new_storages[], 225 enum bpf_attach_type type, 226 struct bpf_prog *prog, 227 struct cgroup *cgrp) 228 { 229 enum bpf_cgroup_storage_type stype; 230 struct bpf_cgroup_storage_key key; 231 struct bpf_map *map; 232 233 key.cgroup_inode_id = cgroup_id(cgrp); 234 key.attach_type = type; 235 236 for_each_cgroup_storage_type(stype) { 237 map = prog->aux->cgroup_storage[stype]; 238 if (!map) 239 continue; 240 241 storages[stype] = cgroup_storage_lookup((void *)map, &key, false); 242 if (storages[stype]) 243 continue; 244 245 storages[stype] = bpf_cgroup_storage_alloc(prog, stype); 246 if (IS_ERR(storages[stype])) { 247 bpf_cgroup_storages_free(new_storages); 248 return -ENOMEM; 249 } 250 251 new_storages[stype] = storages[stype]; 252 } 253 254 return 0; 255 } 256 257 static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[], 258 struct bpf_cgroup_storage *src[]) 259 { 260 enum bpf_cgroup_storage_type stype; 261 262 for_each_cgroup_storage_type(stype) 263 dst[stype] = src[stype]; 264 } 265 266 static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[], 267 struct cgroup *cgrp, 268 enum bpf_attach_type attach_type) 269 { 270 enum bpf_cgroup_storage_type stype; 271 272 for_each_cgroup_storage_type(stype) 273 bpf_cgroup_storage_link(storages[stype], cgrp, attach_type); 274 } 275 276 /* Called when bpf_cgroup_link is auto-detached from dying cgroup. 277 * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It 278 * doesn't free link memory, which will eventually be done by bpf_link's 279 * release() callback, when its last FD is closed. 280 */ 281 static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link) 282 { 283 cgroup_put(link->cgroup); 284 link->cgroup = NULL; 285 } 286 287 /** 288 * cgroup_bpf_release() - put references of all bpf programs and 289 * release all cgroup bpf data 290 * @work: work structure embedded into the cgroup to modify 291 */ 292 static void cgroup_bpf_release(struct work_struct *work) 293 { 294 struct cgroup *p, *cgrp = container_of(work, struct cgroup, 295 bpf.release_work); 296 struct bpf_prog_array *old_array; 297 struct list_head *storages = &cgrp->bpf.storages; 298 struct bpf_cgroup_storage *storage, *stmp; 299 300 unsigned int atype; 301 302 cgroup_lock(); 303 304 for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) { 305 struct hlist_head *progs = &cgrp->bpf.progs[atype]; 306 struct bpf_prog_list *pl; 307 struct hlist_node *pltmp; 308 309 hlist_for_each_entry_safe(pl, pltmp, progs, node) { 310 hlist_del(&pl->node); 311 if (pl->prog) { 312 if (pl->prog->expected_attach_type == BPF_LSM_CGROUP) 313 bpf_trampoline_unlink_cgroup_shim(pl->prog); 314 bpf_prog_put(pl->prog); 315 } 316 if (pl->link) { 317 if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP) 318 bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog); 319 bpf_cgroup_link_auto_detach(pl->link); 320 } 321 kfree(pl); 322 static_branch_dec(&cgroup_bpf_enabled_key[atype]); 323 } 324 old_array = rcu_dereference_protected( 325 cgrp->bpf.effective[atype], 326 lockdep_is_held(&cgroup_mutex)); 327 bpf_prog_array_free(old_array); 328 } 329 330 list_for_each_entry_safe(storage, stmp, storages, list_cg) { 331 bpf_cgroup_storage_unlink(storage); 332 bpf_cgroup_storage_free(storage); 333 } 334 335 cgroup_unlock(); 336 337 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 338 cgroup_bpf_put(p); 339 340 percpu_ref_exit(&cgrp->bpf.refcnt); 341 cgroup_put(cgrp); 342 } 343 344 /** 345 * cgroup_bpf_release_fn() - callback used to schedule releasing 346 * of bpf cgroup data 347 * @ref: percpu ref counter structure 348 */ 349 static void cgroup_bpf_release_fn(struct percpu_ref *ref) 350 { 351 struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); 352 353 INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); 354 queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work); 355 } 356 357 /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through 358 * link or direct prog. 359 */ 360 static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl) 361 { 362 if (pl->prog) 363 return pl->prog; 364 if (pl->link) 365 return pl->link->link.prog; 366 return NULL; 367 } 368 369 /* count number of elements in the list. 370 * it's slow but the list cannot be long 371 */ 372 static u32 prog_list_length(struct hlist_head *head) 373 { 374 struct bpf_prog_list *pl; 375 u32 cnt = 0; 376 377 hlist_for_each_entry(pl, head, node) { 378 if (!prog_list_prog(pl)) 379 continue; 380 cnt++; 381 } 382 return cnt; 383 } 384 385 /* if parent has non-overridable prog attached, 386 * disallow attaching new programs to the descendent cgroup. 387 * if parent has overridable or multi-prog, allow attaching 388 */ 389 static bool hierarchy_allows_attach(struct cgroup *cgrp, 390 enum cgroup_bpf_attach_type atype) 391 { 392 struct cgroup *p; 393 394 p = cgroup_parent(cgrp); 395 if (!p) 396 return true; 397 do { 398 u32 flags = p->bpf.flags[atype]; 399 u32 cnt; 400 401 if (flags & BPF_F_ALLOW_MULTI) 402 return true; 403 cnt = prog_list_length(&p->bpf.progs[atype]); 404 WARN_ON_ONCE(cnt > 1); 405 if (cnt == 1) 406 return !!(flags & BPF_F_ALLOW_OVERRIDE); 407 p = cgroup_parent(p); 408 } while (p); 409 return true; 410 } 411 412 /* compute a chain of effective programs for a given cgroup: 413 * start from the list of programs in this cgroup and add 414 * all parent programs. 415 * Note that parent's F_ALLOW_OVERRIDE-type program is yielding 416 * to programs in this cgroup 417 */ 418 static int compute_effective_progs(struct cgroup *cgrp, 419 enum cgroup_bpf_attach_type atype, 420 struct bpf_prog_array **array) 421 { 422 struct bpf_prog_array_item *item; 423 struct bpf_prog_array *progs; 424 struct bpf_prog_list *pl; 425 struct cgroup *p = cgrp; 426 int cnt = 0; 427 428 /* count number of effective programs by walking parents */ 429 do { 430 if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 431 cnt += prog_list_length(&p->bpf.progs[atype]); 432 p = cgroup_parent(p); 433 } while (p); 434 435 progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); 436 if (!progs) 437 return -ENOMEM; 438 439 /* populate the array with effective progs */ 440 cnt = 0; 441 p = cgrp; 442 do { 443 if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 444 continue; 445 446 hlist_for_each_entry(pl, &p->bpf.progs[atype], node) { 447 if (!prog_list_prog(pl)) 448 continue; 449 450 item = &progs->items[cnt]; 451 item->prog = prog_list_prog(pl); 452 bpf_cgroup_storages_assign(item->cgroup_storage, 453 pl->storage); 454 cnt++; 455 } 456 } while ((p = cgroup_parent(p))); 457 458 *array = progs; 459 return 0; 460 } 461 462 static void activate_effective_progs(struct cgroup *cgrp, 463 enum cgroup_bpf_attach_type atype, 464 struct bpf_prog_array *old_array) 465 { 466 old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array, 467 lockdep_is_held(&cgroup_mutex)); 468 /* free prog array after grace period, since __cgroup_bpf_run_*() 469 * might be still walking the array 470 */ 471 bpf_prog_array_free(old_array); 472 } 473 474 /** 475 * cgroup_bpf_inherit() - inherit effective programs from parent 476 * @cgrp: the cgroup to modify 477 */ 478 int cgroup_bpf_inherit(struct cgroup *cgrp) 479 { 480 /* has to use marco instead of const int, since compiler thinks 481 * that array below is variable length 482 */ 483 #define NR ARRAY_SIZE(cgrp->bpf.effective) 484 struct bpf_prog_array *arrays[NR] = {}; 485 struct cgroup *p; 486 int ret, i; 487 488 ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, 489 GFP_KERNEL); 490 if (ret) 491 return ret; 492 493 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 494 cgroup_bpf_get(p); 495 496 for (i = 0; i < NR; i++) 497 INIT_HLIST_HEAD(&cgrp->bpf.progs[i]); 498 499 INIT_LIST_HEAD(&cgrp->bpf.storages); 500 501 for (i = 0; i < NR; i++) 502 if (compute_effective_progs(cgrp, i, &arrays[i])) 503 goto cleanup; 504 505 for (i = 0; i < NR; i++) 506 activate_effective_progs(cgrp, i, arrays[i]); 507 508 return 0; 509 cleanup: 510 for (i = 0; i < NR; i++) 511 bpf_prog_array_free(arrays[i]); 512 513 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 514 cgroup_bpf_put(p); 515 516 percpu_ref_exit(&cgrp->bpf.refcnt); 517 518 return -ENOMEM; 519 } 520 521 static int update_effective_progs(struct cgroup *cgrp, 522 enum cgroup_bpf_attach_type atype) 523 { 524 struct cgroup_subsys_state *css; 525 int err; 526 527 /* allocate and recompute effective prog arrays */ 528 css_for_each_descendant_pre(css, &cgrp->self) { 529 struct cgroup *desc = container_of(css, struct cgroup, self); 530 531 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 532 continue; 533 534 err = compute_effective_progs(desc, atype, &desc->bpf.inactive); 535 if (err) 536 goto cleanup; 537 } 538 539 /* all allocations were successful. Activate all prog arrays */ 540 css_for_each_descendant_pre(css, &cgrp->self) { 541 struct cgroup *desc = container_of(css, struct cgroup, self); 542 543 if (percpu_ref_is_zero(&desc->bpf.refcnt)) { 544 if (unlikely(desc->bpf.inactive)) { 545 bpf_prog_array_free(desc->bpf.inactive); 546 desc->bpf.inactive = NULL; 547 } 548 continue; 549 } 550 551 activate_effective_progs(desc, atype, desc->bpf.inactive); 552 desc->bpf.inactive = NULL; 553 } 554 555 return 0; 556 557 cleanup: 558 /* oom while computing effective. Free all computed effective arrays 559 * since they were not activated 560 */ 561 css_for_each_descendant_pre(css, &cgrp->self) { 562 struct cgroup *desc = container_of(css, struct cgroup, self); 563 564 bpf_prog_array_free(desc->bpf.inactive); 565 desc->bpf.inactive = NULL; 566 } 567 568 return err; 569 } 570 571 #define BPF_CGROUP_MAX_PROGS 64 572 573 static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs, 574 struct bpf_prog *prog, 575 struct bpf_cgroup_link *link, 576 struct bpf_prog *replace_prog, 577 bool allow_multi) 578 { 579 struct bpf_prog_list *pl; 580 581 /* single-attach case */ 582 if (!allow_multi) { 583 if (hlist_empty(progs)) 584 return NULL; 585 return hlist_entry(progs->first, typeof(*pl), node); 586 } 587 588 hlist_for_each_entry(pl, progs, node) { 589 if (prog && pl->prog == prog && prog != replace_prog) 590 /* disallow attaching the same prog twice */ 591 return ERR_PTR(-EINVAL); 592 if (link && pl->link == link) 593 /* disallow attaching the same link twice */ 594 return ERR_PTR(-EINVAL); 595 } 596 597 /* direct prog multi-attach w/ replacement case */ 598 if (replace_prog) { 599 hlist_for_each_entry(pl, progs, node) { 600 if (pl->prog == replace_prog) 601 /* a match found */ 602 return pl; 603 } 604 /* prog to replace not found for cgroup */ 605 return ERR_PTR(-ENOENT); 606 } 607 608 return NULL; 609 } 610 611 /** 612 * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and 613 * propagate the change to descendants 614 * @cgrp: The cgroup which descendants to traverse 615 * @prog: A program to attach 616 * @link: A link to attach 617 * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set 618 * @type: Type of attach operation 619 * @flags: Option flags 620 * 621 * Exactly one of @prog or @link can be non-null. 622 * Must be called with cgroup_mutex held. 623 */ 624 static int __cgroup_bpf_attach(struct cgroup *cgrp, 625 struct bpf_prog *prog, struct bpf_prog *replace_prog, 626 struct bpf_cgroup_link *link, 627 enum bpf_attach_type type, u32 flags) 628 { 629 u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); 630 struct bpf_prog *old_prog = NULL; 631 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; 632 struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; 633 struct bpf_prog *new_prog = prog ? : link->link.prog; 634 enum cgroup_bpf_attach_type atype; 635 struct bpf_prog_list *pl; 636 struct hlist_head *progs; 637 int err; 638 639 if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || 640 ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) 641 /* invalid combination */ 642 return -EINVAL; 643 if (link && (prog || replace_prog)) 644 /* only either link or prog/replace_prog can be specified */ 645 return -EINVAL; 646 if (!!replace_prog != !!(flags & BPF_F_REPLACE)) 647 /* replace_prog implies BPF_F_REPLACE, and vice versa */ 648 return -EINVAL; 649 650 atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id); 651 if (atype < 0) 652 return -EINVAL; 653 654 progs = &cgrp->bpf.progs[atype]; 655 656 if (!hierarchy_allows_attach(cgrp, atype)) 657 return -EPERM; 658 659 if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags) 660 /* Disallow attaching non-overridable on top 661 * of existing overridable in this cgroup. 662 * Disallow attaching multi-prog if overridable or none 663 */ 664 return -EPERM; 665 666 if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) 667 return -E2BIG; 668 669 pl = find_attach_entry(progs, prog, link, replace_prog, 670 flags & BPF_F_ALLOW_MULTI); 671 if (IS_ERR(pl)) 672 return PTR_ERR(pl); 673 674 if (bpf_cgroup_storages_alloc(storage, new_storage, type, 675 prog ? : link->link.prog, cgrp)) 676 return -ENOMEM; 677 678 if (pl) { 679 old_prog = pl->prog; 680 } else { 681 struct hlist_node *last = NULL; 682 683 pl = kmalloc(sizeof(*pl), GFP_KERNEL); 684 if (!pl) { 685 bpf_cgroup_storages_free(new_storage); 686 return -ENOMEM; 687 } 688 if (hlist_empty(progs)) 689 hlist_add_head(&pl->node, progs); 690 else 691 hlist_for_each(last, progs) { 692 if (last->next) 693 continue; 694 hlist_add_behind(&pl->node, last); 695 break; 696 } 697 } 698 699 pl->prog = prog; 700 pl->link = link; 701 bpf_cgroup_storages_assign(pl->storage, storage); 702 cgrp->bpf.flags[atype] = saved_flags; 703 704 if (type == BPF_LSM_CGROUP) { 705 err = bpf_trampoline_link_cgroup_shim(new_prog, atype); 706 if (err) 707 goto cleanup; 708 } 709 710 err = update_effective_progs(cgrp, atype); 711 if (err) 712 goto cleanup_trampoline; 713 714 if (old_prog) { 715 if (type == BPF_LSM_CGROUP) 716 bpf_trampoline_unlink_cgroup_shim(old_prog); 717 bpf_prog_put(old_prog); 718 } else { 719 static_branch_inc(&cgroup_bpf_enabled_key[atype]); 720 } 721 bpf_cgroup_storages_link(new_storage, cgrp, type); 722 return 0; 723 724 cleanup_trampoline: 725 if (type == BPF_LSM_CGROUP) 726 bpf_trampoline_unlink_cgroup_shim(new_prog); 727 728 cleanup: 729 if (old_prog) { 730 pl->prog = old_prog; 731 pl->link = NULL; 732 } 733 bpf_cgroup_storages_free(new_storage); 734 if (!old_prog) { 735 hlist_del(&pl->node); 736 kfree(pl); 737 } 738 return err; 739 } 740 741 static int cgroup_bpf_attach(struct cgroup *cgrp, 742 struct bpf_prog *prog, struct bpf_prog *replace_prog, 743 struct bpf_cgroup_link *link, 744 enum bpf_attach_type type, 745 u32 flags) 746 { 747 int ret; 748 749 cgroup_lock(); 750 ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); 751 cgroup_unlock(); 752 return ret; 753 } 754 755 /* Swap updated BPF program for given link in effective program arrays across 756 * all descendant cgroups. This function is guaranteed to succeed. 757 */ 758 static void replace_effective_prog(struct cgroup *cgrp, 759 enum cgroup_bpf_attach_type atype, 760 struct bpf_cgroup_link *link) 761 { 762 struct bpf_prog_array_item *item; 763 struct cgroup_subsys_state *css; 764 struct bpf_prog_array *progs; 765 struct bpf_prog_list *pl; 766 struct hlist_head *head; 767 struct cgroup *cg; 768 int pos; 769 770 css_for_each_descendant_pre(css, &cgrp->self) { 771 struct cgroup *desc = container_of(css, struct cgroup, self); 772 773 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 774 continue; 775 776 /* find position of link in effective progs array */ 777 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { 778 if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 779 continue; 780 781 head = &cg->bpf.progs[atype]; 782 hlist_for_each_entry(pl, head, node) { 783 if (!prog_list_prog(pl)) 784 continue; 785 if (pl->link == link) 786 goto found; 787 pos++; 788 } 789 } 790 found: 791 BUG_ON(!cg); 792 progs = rcu_dereference_protected( 793 desc->bpf.effective[atype], 794 lockdep_is_held(&cgroup_mutex)); 795 item = &progs->items[pos]; 796 WRITE_ONCE(item->prog, link->link.prog); 797 } 798 } 799 800 /** 801 * __cgroup_bpf_replace() - Replace link's program and propagate the change 802 * to descendants 803 * @cgrp: The cgroup which descendants to traverse 804 * @link: A link for which to replace BPF program 805 * @new_prog: &struct bpf_prog for the target BPF program with its refcnt 806 * incremented 807 * 808 * Must be called with cgroup_mutex held. 809 */ 810 static int __cgroup_bpf_replace(struct cgroup *cgrp, 811 struct bpf_cgroup_link *link, 812 struct bpf_prog *new_prog) 813 { 814 enum cgroup_bpf_attach_type atype; 815 struct bpf_prog *old_prog; 816 struct bpf_prog_list *pl; 817 struct hlist_head *progs; 818 bool found = false; 819 820 atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id); 821 if (atype < 0) 822 return -EINVAL; 823 824 progs = &cgrp->bpf.progs[atype]; 825 826 if (link->link.prog->type != new_prog->type) 827 return -EINVAL; 828 829 hlist_for_each_entry(pl, progs, node) { 830 if (pl->link == link) { 831 found = true; 832 break; 833 } 834 } 835 if (!found) 836 return -ENOENT; 837 838 old_prog = xchg(&link->link.prog, new_prog); 839 replace_effective_prog(cgrp, atype, link); 840 bpf_prog_put(old_prog); 841 return 0; 842 } 843 844 static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog, 845 struct bpf_prog *old_prog) 846 { 847 struct bpf_cgroup_link *cg_link; 848 int ret; 849 850 cg_link = container_of(link, struct bpf_cgroup_link, link); 851 852 cgroup_lock(); 853 /* link might have been auto-released by dying cgroup, so fail */ 854 if (!cg_link->cgroup) { 855 ret = -ENOLINK; 856 goto out_unlock; 857 } 858 if (old_prog && link->prog != old_prog) { 859 ret = -EPERM; 860 goto out_unlock; 861 } 862 ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); 863 out_unlock: 864 cgroup_unlock(); 865 return ret; 866 } 867 868 static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs, 869 struct bpf_prog *prog, 870 struct bpf_cgroup_link *link, 871 bool allow_multi) 872 { 873 struct bpf_prog_list *pl; 874 875 if (!allow_multi) { 876 if (hlist_empty(progs)) 877 /* report error when trying to detach and nothing is attached */ 878 return ERR_PTR(-ENOENT); 879 880 /* to maintain backward compatibility NONE and OVERRIDE cgroups 881 * allow detaching with invalid FD (prog==NULL) in legacy mode 882 */ 883 return hlist_entry(progs->first, typeof(*pl), node); 884 } 885 886 if (!prog && !link) 887 /* to detach MULTI prog the user has to specify valid FD 888 * of the program or link to be detached 889 */ 890 return ERR_PTR(-EINVAL); 891 892 /* find the prog or link and detach it */ 893 hlist_for_each_entry(pl, progs, node) { 894 if (pl->prog == prog && pl->link == link) 895 return pl; 896 } 897 return ERR_PTR(-ENOENT); 898 } 899 900 /** 901 * purge_effective_progs() - After compute_effective_progs fails to alloc new 902 * cgrp->bpf.inactive table we can recover by 903 * recomputing the array in place. 904 * 905 * @cgrp: The cgroup which descendants to travers 906 * @prog: A program to detach or NULL 907 * @link: A link to detach or NULL 908 * @atype: Type of detach operation 909 */ 910 static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, 911 struct bpf_cgroup_link *link, 912 enum cgroup_bpf_attach_type atype) 913 { 914 struct cgroup_subsys_state *css; 915 struct bpf_prog_array *progs; 916 struct bpf_prog_list *pl; 917 struct hlist_head *head; 918 struct cgroup *cg; 919 int pos; 920 921 /* recompute effective prog array in place */ 922 css_for_each_descendant_pre(css, &cgrp->self) { 923 struct cgroup *desc = container_of(css, struct cgroup, self); 924 925 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 926 continue; 927 928 /* find position of link or prog in effective progs array */ 929 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { 930 if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 931 continue; 932 933 head = &cg->bpf.progs[atype]; 934 hlist_for_each_entry(pl, head, node) { 935 if (!prog_list_prog(pl)) 936 continue; 937 if (pl->prog == prog && pl->link == link) 938 goto found; 939 pos++; 940 } 941 } 942 943 /* no link or prog match, skip the cgroup of this layer */ 944 continue; 945 found: 946 progs = rcu_dereference_protected( 947 desc->bpf.effective[atype], 948 lockdep_is_held(&cgroup_mutex)); 949 950 /* Remove the program from the array */ 951 WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos), 952 "Failed to purge a prog from array at index %d", pos); 953 } 954 } 955 956 /** 957 * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and 958 * propagate the change to descendants 959 * @cgrp: The cgroup which descendants to traverse 960 * @prog: A program to detach or NULL 961 * @link: A link to detach or NULL 962 * @type: Type of detach operation 963 * 964 * At most one of @prog or @link can be non-NULL. 965 * Must be called with cgroup_mutex held. 966 */ 967 static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 968 struct bpf_cgroup_link *link, enum bpf_attach_type type) 969 { 970 enum cgroup_bpf_attach_type atype; 971 struct bpf_prog *old_prog; 972 struct bpf_prog_list *pl; 973 struct hlist_head *progs; 974 u32 attach_btf_id = 0; 975 u32 flags; 976 977 if (prog) 978 attach_btf_id = prog->aux->attach_btf_id; 979 if (link) 980 attach_btf_id = link->link.prog->aux->attach_btf_id; 981 982 atype = bpf_cgroup_atype_find(type, attach_btf_id); 983 if (atype < 0) 984 return -EINVAL; 985 986 progs = &cgrp->bpf.progs[atype]; 987 flags = cgrp->bpf.flags[atype]; 988 989 if (prog && link) 990 /* only one of prog or link can be specified */ 991 return -EINVAL; 992 993 pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI); 994 if (IS_ERR(pl)) 995 return PTR_ERR(pl); 996 997 /* mark it deleted, so it's ignored while recomputing effective */ 998 old_prog = pl->prog; 999 pl->prog = NULL; 1000 pl->link = NULL; 1001 1002 if (update_effective_progs(cgrp, atype)) { 1003 /* if update effective array failed replace the prog with a dummy prog*/ 1004 pl->prog = old_prog; 1005 pl->link = link; 1006 purge_effective_progs(cgrp, old_prog, link, atype); 1007 } 1008 1009 /* now can actually delete it from this cgroup list */ 1010 hlist_del(&pl->node); 1011 1012 kfree(pl); 1013 if (hlist_empty(progs)) 1014 /* last program was detached, reset flags to zero */ 1015 cgrp->bpf.flags[atype] = 0; 1016 if (old_prog) { 1017 if (type == BPF_LSM_CGROUP) 1018 bpf_trampoline_unlink_cgroup_shim(old_prog); 1019 bpf_prog_put(old_prog); 1020 } 1021 static_branch_dec(&cgroup_bpf_enabled_key[atype]); 1022 return 0; 1023 } 1024 1025 static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 1026 enum bpf_attach_type type) 1027 { 1028 int ret; 1029 1030 cgroup_lock(); 1031 ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); 1032 cgroup_unlock(); 1033 return ret; 1034 } 1035 1036 /* Must be called with cgroup_mutex held to avoid races. */ 1037 static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 1038 union bpf_attr __user *uattr) 1039 { 1040 __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags); 1041 bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE; 1042 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 1043 enum bpf_attach_type type = attr->query.attach_type; 1044 enum cgroup_bpf_attach_type from_atype, to_atype; 1045 enum cgroup_bpf_attach_type atype; 1046 struct bpf_prog_array *effective; 1047 int cnt, ret = 0, i; 1048 int total_cnt = 0; 1049 u32 flags; 1050 1051 if (effective_query && prog_attach_flags) 1052 return -EINVAL; 1053 1054 if (type == BPF_LSM_CGROUP) { 1055 if (!effective_query && attr->query.prog_cnt && 1056 prog_ids && !prog_attach_flags) 1057 return -EINVAL; 1058 1059 from_atype = CGROUP_LSM_START; 1060 to_atype = CGROUP_LSM_END; 1061 flags = 0; 1062 } else { 1063 from_atype = to_cgroup_bpf_attach_type(type); 1064 if (from_atype < 0) 1065 return -EINVAL; 1066 to_atype = from_atype; 1067 flags = cgrp->bpf.flags[from_atype]; 1068 } 1069 1070 for (atype = from_atype; atype <= to_atype; atype++) { 1071 if (effective_query) { 1072 effective = rcu_dereference_protected(cgrp->bpf.effective[atype], 1073 lockdep_is_held(&cgroup_mutex)); 1074 total_cnt += bpf_prog_array_length(effective); 1075 } else { 1076 total_cnt += prog_list_length(&cgrp->bpf.progs[atype]); 1077 } 1078 } 1079 1080 /* always output uattr->query.attach_flags as 0 during effective query */ 1081 flags = effective_query ? 0 : flags; 1082 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 1083 return -EFAULT; 1084 if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt))) 1085 return -EFAULT; 1086 if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt) 1087 /* return early if user requested only program count + flags */ 1088 return 0; 1089 1090 if (attr->query.prog_cnt < total_cnt) { 1091 total_cnt = attr->query.prog_cnt; 1092 ret = -ENOSPC; 1093 } 1094 1095 for (atype = from_atype; atype <= to_atype && total_cnt; atype++) { 1096 if (effective_query) { 1097 effective = rcu_dereference_protected(cgrp->bpf.effective[atype], 1098 lockdep_is_held(&cgroup_mutex)); 1099 cnt = min_t(int, bpf_prog_array_length(effective), total_cnt); 1100 ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt); 1101 } else { 1102 struct hlist_head *progs; 1103 struct bpf_prog_list *pl; 1104 struct bpf_prog *prog; 1105 u32 id; 1106 1107 progs = &cgrp->bpf.progs[atype]; 1108 cnt = min_t(int, prog_list_length(progs), total_cnt); 1109 i = 0; 1110 hlist_for_each_entry(pl, progs, node) { 1111 prog = prog_list_prog(pl); 1112 id = prog->aux->id; 1113 if (copy_to_user(prog_ids + i, &id, sizeof(id))) 1114 return -EFAULT; 1115 if (++i == cnt) 1116 break; 1117 } 1118 1119 if (prog_attach_flags) { 1120 flags = cgrp->bpf.flags[atype]; 1121 1122 for (i = 0; i < cnt; i++) 1123 if (copy_to_user(prog_attach_flags + i, 1124 &flags, sizeof(flags))) 1125 return -EFAULT; 1126 prog_attach_flags += cnt; 1127 } 1128 } 1129 1130 prog_ids += cnt; 1131 total_cnt -= cnt; 1132 } 1133 return ret; 1134 } 1135 1136 static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 1137 union bpf_attr __user *uattr) 1138 { 1139 int ret; 1140 1141 cgroup_lock(); 1142 ret = __cgroup_bpf_query(cgrp, attr, uattr); 1143 cgroup_unlock(); 1144 return ret; 1145 } 1146 1147 int cgroup_bpf_prog_attach(const union bpf_attr *attr, 1148 enum bpf_prog_type ptype, struct bpf_prog *prog) 1149 { 1150 struct bpf_prog *replace_prog = NULL; 1151 struct cgroup *cgrp; 1152 int ret; 1153 1154 cgrp = cgroup_get_from_fd(attr->target_fd); 1155 if (IS_ERR(cgrp)) 1156 return PTR_ERR(cgrp); 1157 1158 if ((attr->attach_flags & BPF_F_ALLOW_MULTI) && 1159 (attr->attach_flags & BPF_F_REPLACE)) { 1160 replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype); 1161 if (IS_ERR(replace_prog)) { 1162 cgroup_put(cgrp); 1163 return PTR_ERR(replace_prog); 1164 } 1165 } 1166 1167 ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL, 1168 attr->attach_type, attr->attach_flags); 1169 1170 if (replace_prog) 1171 bpf_prog_put(replace_prog); 1172 cgroup_put(cgrp); 1173 return ret; 1174 } 1175 1176 int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 1177 { 1178 struct bpf_prog *prog; 1179 struct cgroup *cgrp; 1180 int ret; 1181 1182 cgrp = cgroup_get_from_fd(attr->target_fd); 1183 if (IS_ERR(cgrp)) 1184 return PTR_ERR(cgrp); 1185 1186 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1187 if (IS_ERR(prog)) 1188 prog = NULL; 1189 1190 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type); 1191 if (prog) 1192 bpf_prog_put(prog); 1193 1194 cgroup_put(cgrp); 1195 return ret; 1196 } 1197 1198 static void bpf_cgroup_link_release(struct bpf_link *link) 1199 { 1200 struct bpf_cgroup_link *cg_link = 1201 container_of(link, struct bpf_cgroup_link, link); 1202 struct cgroup *cg; 1203 1204 /* link might have been auto-detached by dying cgroup already, 1205 * in that case our work is done here 1206 */ 1207 if (!cg_link->cgroup) 1208 return; 1209 1210 cgroup_lock(); 1211 1212 /* re-check cgroup under lock again */ 1213 if (!cg_link->cgroup) { 1214 cgroup_unlock(); 1215 return; 1216 } 1217 1218 WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link, 1219 cg_link->type)); 1220 if (cg_link->type == BPF_LSM_CGROUP) 1221 bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog); 1222 1223 cg = cg_link->cgroup; 1224 cg_link->cgroup = NULL; 1225 1226 cgroup_unlock(); 1227 1228 cgroup_put(cg); 1229 } 1230 1231 static void bpf_cgroup_link_dealloc(struct bpf_link *link) 1232 { 1233 struct bpf_cgroup_link *cg_link = 1234 container_of(link, struct bpf_cgroup_link, link); 1235 1236 kfree(cg_link); 1237 } 1238 1239 static int bpf_cgroup_link_detach(struct bpf_link *link) 1240 { 1241 bpf_cgroup_link_release(link); 1242 1243 return 0; 1244 } 1245 1246 static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link, 1247 struct seq_file *seq) 1248 { 1249 struct bpf_cgroup_link *cg_link = 1250 container_of(link, struct bpf_cgroup_link, link); 1251 u64 cg_id = 0; 1252 1253 cgroup_lock(); 1254 if (cg_link->cgroup) 1255 cg_id = cgroup_id(cg_link->cgroup); 1256 cgroup_unlock(); 1257 1258 seq_printf(seq, 1259 "cgroup_id:\t%llu\n" 1260 "attach_type:\t%d\n", 1261 cg_id, 1262 cg_link->type); 1263 } 1264 1265 static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link, 1266 struct bpf_link_info *info) 1267 { 1268 struct bpf_cgroup_link *cg_link = 1269 container_of(link, struct bpf_cgroup_link, link); 1270 u64 cg_id = 0; 1271 1272 cgroup_lock(); 1273 if (cg_link->cgroup) 1274 cg_id = cgroup_id(cg_link->cgroup); 1275 cgroup_unlock(); 1276 1277 info->cgroup.cgroup_id = cg_id; 1278 info->cgroup.attach_type = cg_link->type; 1279 return 0; 1280 } 1281 1282 static const struct bpf_link_ops bpf_cgroup_link_lops = { 1283 .release = bpf_cgroup_link_release, 1284 .dealloc = bpf_cgroup_link_dealloc, 1285 .detach = bpf_cgroup_link_detach, 1286 .update_prog = cgroup_bpf_replace, 1287 .show_fdinfo = bpf_cgroup_link_show_fdinfo, 1288 .fill_link_info = bpf_cgroup_link_fill_link_info, 1289 }; 1290 1291 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 1292 { 1293 struct bpf_link_primer link_primer; 1294 struct bpf_cgroup_link *link; 1295 struct cgroup *cgrp; 1296 int err; 1297 1298 if (attr->link_create.flags) 1299 return -EINVAL; 1300 1301 cgrp = cgroup_get_from_fd(attr->link_create.target_fd); 1302 if (IS_ERR(cgrp)) 1303 return PTR_ERR(cgrp); 1304 1305 link = kzalloc(sizeof(*link), GFP_USER); 1306 if (!link) { 1307 err = -ENOMEM; 1308 goto out_put_cgroup; 1309 } 1310 bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops, 1311 prog); 1312 link->cgroup = cgrp; 1313 link->type = attr->link_create.attach_type; 1314 1315 err = bpf_link_prime(&link->link, &link_primer); 1316 if (err) { 1317 kfree(link); 1318 goto out_put_cgroup; 1319 } 1320 1321 err = cgroup_bpf_attach(cgrp, NULL, NULL, link, 1322 link->type, BPF_F_ALLOW_MULTI); 1323 if (err) { 1324 bpf_link_cleanup(&link_primer); 1325 goto out_put_cgroup; 1326 } 1327 1328 return bpf_link_settle(&link_primer); 1329 1330 out_put_cgroup: 1331 cgroup_put(cgrp); 1332 return err; 1333 } 1334 1335 int cgroup_bpf_prog_query(const union bpf_attr *attr, 1336 union bpf_attr __user *uattr) 1337 { 1338 struct cgroup *cgrp; 1339 int ret; 1340 1341 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1342 if (IS_ERR(cgrp)) 1343 return PTR_ERR(cgrp); 1344 1345 ret = cgroup_bpf_query(cgrp, attr, uattr); 1346 1347 cgroup_put(cgrp); 1348 return ret; 1349 } 1350 1351 /** 1352 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering 1353 * @sk: The socket sending or receiving traffic 1354 * @skb: The skb that is being sent or received 1355 * @atype: The type of program to be executed 1356 * 1357 * If no socket is passed, or the socket is not of type INET or INET6, 1358 * this function does nothing and returns 0. 1359 * 1360 * The program type passed in via @type must be suitable for network 1361 * filtering. No further check is performed to assert that. 1362 * 1363 * For egress packets, this function can return: 1364 * NET_XMIT_SUCCESS (0) - continue with packet output 1365 * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr 1366 * NET_XMIT_CN (2) - continue with packet output and notify TCP 1367 * to call cwr 1368 * -err - drop packet 1369 * 1370 * For ingress packets, this function will return -EPERM if any 1371 * attached program was found and if it returned != 1 during execution. 1372 * Otherwise 0 is returned. 1373 */ 1374 int __cgroup_bpf_run_filter_skb(struct sock *sk, 1375 struct sk_buff *skb, 1376 enum cgroup_bpf_attach_type atype) 1377 { 1378 unsigned int offset = skb->data - skb_network_header(skb); 1379 struct sock *save_sk; 1380 void *saved_data_end; 1381 struct cgroup *cgrp; 1382 int ret; 1383 1384 if (!sk || !sk_fullsock(sk)) 1385 return 0; 1386 1387 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) 1388 return 0; 1389 1390 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1391 save_sk = skb->sk; 1392 skb->sk = sk; 1393 __skb_push(skb, offset); 1394 1395 /* compute pointers for the bpf prog */ 1396 bpf_compute_and_save_data_end(skb, &saved_data_end); 1397 1398 if (atype == CGROUP_INET_EGRESS) { 1399 u32 flags = 0; 1400 bool cn; 1401 1402 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb, 1403 __bpf_prog_run_save_cb, 0, &flags); 1404 1405 /* Return values of CGROUP EGRESS BPF programs are: 1406 * 0: drop packet 1407 * 1: keep packet 1408 * 2: drop packet and cn 1409 * 3: keep packet and cn 1410 * 1411 * The returned value is then converted to one of the NET_XMIT 1412 * or an error code that is then interpreted as drop packet 1413 * (and no cn): 1414 * 0: NET_XMIT_SUCCESS skb should be transmitted 1415 * 1: NET_XMIT_DROP skb should be dropped and cn 1416 * 2: NET_XMIT_CN skb should be transmitted and cn 1417 * 3: -err skb should be dropped 1418 */ 1419 1420 cn = flags & BPF_RET_SET_CN; 1421 if (ret && !IS_ERR_VALUE((long)ret)) 1422 ret = -EFAULT; 1423 if (!ret) 1424 ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); 1425 else 1426 ret = (cn ? NET_XMIT_DROP : ret); 1427 } else { 1428 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, 1429 skb, __bpf_prog_run_save_cb, 0, 1430 NULL); 1431 if (ret && !IS_ERR_VALUE((long)ret)) 1432 ret = -EFAULT; 1433 } 1434 bpf_restore_data_end(skb, saved_data_end); 1435 __skb_pull(skb, offset); 1436 skb->sk = save_sk; 1437 1438 return ret; 1439 } 1440 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); 1441 1442 /** 1443 * __cgroup_bpf_run_filter_sk() - Run a program on a sock 1444 * @sk: sock structure to manipulate 1445 * @atype: The type of program to be executed 1446 * 1447 * socket is passed is expected to be of type INET or INET6. 1448 * 1449 * The program type passed in via @type must be suitable for sock 1450 * filtering. No further check is performed to assert that. 1451 * 1452 * This function will return %-EPERM if any if an attached program was found 1453 * and if it returned != 1 during execution. In all other cases, 0 is returned. 1454 */ 1455 int __cgroup_bpf_run_filter_sk(struct sock *sk, 1456 enum cgroup_bpf_attach_type atype) 1457 { 1458 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1459 1460 return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0, 1461 NULL); 1462 } 1463 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 1464 1465 /** 1466 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and 1467 * provided by user sockaddr 1468 * @sk: sock struct that will use sockaddr 1469 * @uaddr: sockaddr struct provided by user 1470 * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is 1471 * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX 1472 * uaddr. 1473 * @atype: The type of program to be executed 1474 * @t_ctx: Pointer to attach type specific context 1475 * @flags: Pointer to u32 which contains higher bits of BPF program 1476 * return value (OR'ed together). 1477 * 1478 * socket is expected to be of type INET or INET6. 1479 * 1480 * This function will return %-EPERM if an attached program is found and 1481 * returned value != 1 during execution. In all other cases, 0 is returned. 1482 */ 1483 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, 1484 struct sockaddr *uaddr, 1485 int *uaddrlen, 1486 enum cgroup_bpf_attach_type atype, 1487 void *t_ctx, 1488 u32 *flags) 1489 { 1490 struct bpf_sock_addr_kern ctx = { 1491 .sk = sk, 1492 .uaddr = uaddr, 1493 .t_ctx = t_ctx, 1494 }; 1495 struct sockaddr_storage unspec; 1496 struct cgroup *cgrp; 1497 int ret; 1498 1499 /* Check socket family since not all sockets represent network 1500 * endpoint (e.g. AF_UNIX). 1501 */ 1502 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) 1503 return 0; 1504 1505 if (!ctx.uaddr) { 1506 memset(&unspec, 0, sizeof(unspec)); 1507 ctx.uaddr = (struct sockaddr *)&unspec; 1508 ctx.uaddrlen = 0; 1509 } else { 1510 ctx.uaddrlen = *uaddrlen; 1511 } 1512 1513 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1514 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 1515 0, flags); 1516 1517 if (!ret && uaddr) 1518 *uaddrlen = ctx.uaddrlen; 1519 1520 return ret; 1521 } 1522 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); 1523 1524 /** 1525 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock 1526 * @sk: socket to get cgroup from 1527 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains 1528 * sk with connection information (IP addresses, etc.) May not contain 1529 * cgroup info if it is a req sock. 1530 * @atype: The type of program to be executed 1531 * 1532 * socket passed is expected to be of type INET or INET6. 1533 * 1534 * The program type passed in via @type must be suitable for sock_ops 1535 * filtering. No further check is performed to assert that. 1536 * 1537 * This function will return %-EPERM if any if an attached program was found 1538 * and if it returned != 1 during execution. In all other cases, 0 is returned. 1539 */ 1540 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, 1541 struct bpf_sock_ops_kern *sock_ops, 1542 enum cgroup_bpf_attach_type atype) 1543 { 1544 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1545 1546 return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run, 1547 0, NULL); 1548 } 1549 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); 1550 1551 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, 1552 short access, enum cgroup_bpf_attach_type atype) 1553 { 1554 struct cgroup *cgrp; 1555 struct bpf_cgroup_dev_ctx ctx = { 1556 .access_type = (access << 16) | dev_type, 1557 .major = major, 1558 .minor = minor, 1559 }; 1560 int ret; 1561 1562 rcu_read_lock(); 1563 cgrp = task_dfl_cgroup(current); 1564 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, 1565 NULL); 1566 rcu_read_unlock(); 1567 1568 return ret; 1569 } 1570 1571 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 1572 { 1573 /* flags argument is not used now, 1574 * but provides an ability to extend the API. 1575 * verifier checks that its value is correct. 1576 */ 1577 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 1578 struct bpf_cgroup_storage *storage; 1579 struct bpf_cg_run_ctx *ctx; 1580 void *ptr; 1581 1582 /* get current cgroup storage from BPF run context */ 1583 ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1584 storage = ctx->prog_item->cgroup_storage[stype]; 1585 1586 if (stype == BPF_CGROUP_STORAGE_SHARED) 1587 ptr = &READ_ONCE(storage->buf)->data[0]; 1588 else 1589 ptr = this_cpu_ptr(storage->percpu_buf); 1590 1591 return (unsigned long)ptr; 1592 } 1593 1594 const struct bpf_func_proto bpf_get_local_storage_proto = { 1595 .func = bpf_get_local_storage, 1596 .gpl_only = false, 1597 .ret_type = RET_PTR_TO_MAP_VALUE, 1598 .arg1_type = ARG_CONST_MAP_PTR, 1599 .arg2_type = ARG_ANYTHING, 1600 }; 1601 1602 BPF_CALL_0(bpf_get_retval) 1603 { 1604 struct bpf_cg_run_ctx *ctx = 1605 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1606 1607 return ctx->retval; 1608 } 1609 1610 const struct bpf_func_proto bpf_get_retval_proto = { 1611 .func = bpf_get_retval, 1612 .gpl_only = false, 1613 .ret_type = RET_INTEGER, 1614 }; 1615 1616 BPF_CALL_1(bpf_set_retval, int, retval) 1617 { 1618 struct bpf_cg_run_ctx *ctx = 1619 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1620 1621 ctx->retval = retval; 1622 return 0; 1623 } 1624 1625 const struct bpf_func_proto bpf_set_retval_proto = { 1626 .func = bpf_set_retval, 1627 .gpl_only = false, 1628 .ret_type = RET_INTEGER, 1629 .arg1_type = ARG_ANYTHING, 1630 }; 1631 1632 static const struct bpf_func_proto * 1633 cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1634 { 1635 const struct bpf_func_proto *func_proto; 1636 1637 func_proto = cgroup_common_func_proto(func_id, prog); 1638 if (func_proto) 1639 return func_proto; 1640 1641 func_proto = cgroup_current_func_proto(func_id, prog); 1642 if (func_proto) 1643 return func_proto; 1644 1645 switch (func_id) { 1646 case BPF_FUNC_perf_event_output: 1647 return &bpf_event_output_data_proto; 1648 default: 1649 return bpf_base_func_proto(func_id); 1650 } 1651 } 1652 1653 static bool cgroup_dev_is_valid_access(int off, int size, 1654 enum bpf_access_type type, 1655 const struct bpf_prog *prog, 1656 struct bpf_insn_access_aux *info) 1657 { 1658 const int size_default = sizeof(__u32); 1659 1660 if (type == BPF_WRITE) 1661 return false; 1662 1663 if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) 1664 return false; 1665 /* The verifier guarantees that size > 0. */ 1666 if (off % size != 0) 1667 return false; 1668 1669 switch (off) { 1670 case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): 1671 bpf_ctx_record_field_size(info, size_default); 1672 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 1673 return false; 1674 break; 1675 default: 1676 if (size != size_default) 1677 return false; 1678 } 1679 1680 return true; 1681 } 1682 1683 const struct bpf_prog_ops cg_dev_prog_ops = { 1684 }; 1685 1686 const struct bpf_verifier_ops cg_dev_verifier_ops = { 1687 .get_func_proto = cgroup_dev_func_proto, 1688 .is_valid_access = cgroup_dev_is_valid_access, 1689 }; 1690 1691 /** 1692 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl 1693 * 1694 * @head: sysctl table header 1695 * @table: sysctl table 1696 * @write: sysctl is being read (= 0) or written (= 1) 1697 * @buf: pointer to buffer (in and out) 1698 * @pcount: value-result argument: value is size of buffer pointed to by @buf, 1699 * result is size of @new_buf if program set new value, initial value 1700 * otherwise 1701 * @ppos: value-result argument: value is position at which read from or write 1702 * to sysctl is happening, result is new position if program overrode it, 1703 * initial value otherwise 1704 * @atype: type of program to be executed 1705 * 1706 * Program is run when sysctl is being accessed, either read or written, and 1707 * can allow or deny such access. 1708 * 1709 * This function will return %-EPERM if an attached program is found and 1710 * returned value != 1 during execution. In all other cases 0 is returned. 1711 */ 1712 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, 1713 struct ctl_table *table, int write, 1714 char **buf, size_t *pcount, loff_t *ppos, 1715 enum cgroup_bpf_attach_type atype) 1716 { 1717 struct bpf_sysctl_kern ctx = { 1718 .head = head, 1719 .table = table, 1720 .write = write, 1721 .ppos = ppos, 1722 .cur_val = NULL, 1723 .cur_len = PAGE_SIZE, 1724 .new_val = NULL, 1725 .new_len = 0, 1726 .new_updated = 0, 1727 }; 1728 struct cgroup *cgrp; 1729 loff_t pos = 0; 1730 int ret; 1731 1732 ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); 1733 if (!ctx.cur_val || 1734 table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) { 1735 /* Let BPF program decide how to proceed. */ 1736 ctx.cur_len = 0; 1737 } 1738 1739 if (write && *buf && *pcount) { 1740 /* BPF program should be able to override new value with a 1741 * buffer bigger than provided by user. 1742 */ 1743 ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); 1744 ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount); 1745 if (ctx.new_val) { 1746 memcpy(ctx.new_val, *buf, ctx.new_len); 1747 } else { 1748 /* Let BPF program decide how to proceed. */ 1749 ctx.new_len = 0; 1750 } 1751 } 1752 1753 rcu_read_lock(); 1754 cgrp = task_dfl_cgroup(current); 1755 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, 1756 NULL); 1757 rcu_read_unlock(); 1758 1759 kfree(ctx.cur_val); 1760 1761 if (ret == 1 && ctx.new_updated) { 1762 kfree(*buf); 1763 *buf = ctx.new_val; 1764 *pcount = ctx.new_len; 1765 } else { 1766 kfree(ctx.new_val); 1767 } 1768 1769 return ret; 1770 } 1771 1772 #ifdef CONFIG_NET 1773 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen, 1774 struct bpf_sockopt_buf *buf) 1775 { 1776 if (unlikely(max_optlen < 0)) 1777 return -EINVAL; 1778 1779 if (unlikely(max_optlen > PAGE_SIZE)) { 1780 /* We don't expose optvals that are greater than PAGE_SIZE 1781 * to the BPF program. 1782 */ 1783 max_optlen = PAGE_SIZE; 1784 } 1785 1786 if (max_optlen <= sizeof(buf->data)) { 1787 /* When the optval fits into BPF_SOCKOPT_KERN_BUF_SIZE 1788 * bytes avoid the cost of kzalloc. 1789 */ 1790 ctx->optval = buf->data; 1791 ctx->optval_end = ctx->optval + max_optlen; 1792 return max_optlen; 1793 } 1794 1795 ctx->optval = kzalloc(max_optlen, GFP_USER); 1796 if (!ctx->optval) 1797 return -ENOMEM; 1798 1799 ctx->optval_end = ctx->optval + max_optlen; 1800 1801 return max_optlen; 1802 } 1803 1804 static void sockopt_free_buf(struct bpf_sockopt_kern *ctx, 1805 struct bpf_sockopt_buf *buf) 1806 { 1807 if (ctx->optval == buf->data) 1808 return; 1809 kfree(ctx->optval); 1810 } 1811 1812 static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx, 1813 struct bpf_sockopt_buf *buf) 1814 { 1815 return ctx->optval != buf->data; 1816 } 1817 1818 int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, 1819 int *optname, sockptr_t optval, 1820 int *optlen, char **kernel_optval) 1821 { 1822 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1823 struct bpf_sockopt_buf buf = {}; 1824 struct bpf_sockopt_kern ctx = { 1825 .sk = sk, 1826 .level = *level, 1827 .optname = *optname, 1828 }; 1829 int ret, max_optlen; 1830 1831 /* Allocate a bit more than the initial user buffer for 1832 * BPF program. The canonical use case is overriding 1833 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic). 1834 */ 1835 max_optlen = max_t(int, 16, *optlen); 1836 max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf); 1837 if (max_optlen < 0) 1838 return max_optlen; 1839 1840 ctx.optlen = *optlen; 1841 1842 if (copy_from_sockptr(ctx.optval, optval, 1843 min(*optlen, max_optlen))) { 1844 ret = -EFAULT; 1845 goto out; 1846 } 1847 1848 lock_sock(sk); 1849 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT, 1850 &ctx, bpf_prog_run, 0, NULL); 1851 release_sock(sk); 1852 1853 if (ret) 1854 goto out; 1855 1856 if (ctx.optlen == -1) { 1857 /* optlen set to -1, bypass kernel */ 1858 ret = 1; 1859 } else if (ctx.optlen > max_optlen || ctx.optlen < -1) { 1860 /* optlen is out of bounds */ 1861 if (*optlen > PAGE_SIZE && ctx.optlen >= 0) { 1862 pr_info_once("bpf setsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n", 1863 ctx.optlen, max_optlen); 1864 ret = 0; 1865 goto out; 1866 } 1867 ret = -EFAULT; 1868 } else { 1869 /* optlen within bounds, run kernel handler */ 1870 ret = 0; 1871 1872 /* export any potential modifications */ 1873 *level = ctx.level; 1874 *optname = ctx.optname; 1875 1876 /* optlen == 0 from BPF indicates that we should 1877 * use original userspace data. 1878 */ 1879 if (ctx.optlen != 0) { 1880 *optlen = ctx.optlen; 1881 /* We've used bpf_sockopt_kern->buf as an intermediary 1882 * storage, but the BPF program indicates that we need 1883 * to pass this data to the kernel setsockopt handler. 1884 * No way to export on-stack buf, have to allocate a 1885 * new buffer. 1886 */ 1887 if (!sockopt_buf_allocated(&ctx, &buf)) { 1888 void *p = kmalloc(ctx.optlen, GFP_USER); 1889 1890 if (!p) { 1891 ret = -ENOMEM; 1892 goto out; 1893 } 1894 memcpy(p, ctx.optval, ctx.optlen); 1895 *kernel_optval = p; 1896 } else { 1897 *kernel_optval = ctx.optval; 1898 } 1899 /* export and don't free sockopt buf */ 1900 return 0; 1901 } 1902 } 1903 1904 out: 1905 sockopt_free_buf(&ctx, &buf); 1906 return ret; 1907 } 1908 1909 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, 1910 int optname, sockptr_t optval, 1911 sockptr_t optlen, int max_optlen, 1912 int retval) 1913 { 1914 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1915 struct bpf_sockopt_buf buf = {}; 1916 struct bpf_sockopt_kern ctx = { 1917 .sk = sk, 1918 .level = level, 1919 .optname = optname, 1920 .current_task = current, 1921 }; 1922 int orig_optlen; 1923 int ret; 1924 1925 orig_optlen = max_optlen; 1926 ctx.optlen = max_optlen; 1927 max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf); 1928 if (max_optlen < 0) 1929 return max_optlen; 1930 1931 if (!retval) { 1932 /* If kernel getsockopt finished successfully, 1933 * copy whatever was returned to the user back 1934 * into our temporary buffer. Set optlen to the 1935 * one that kernel returned as well to let 1936 * BPF programs inspect the value. 1937 */ 1938 if (copy_from_sockptr(&ctx.optlen, optlen, 1939 sizeof(ctx.optlen))) { 1940 ret = -EFAULT; 1941 goto out; 1942 } 1943 1944 if (ctx.optlen < 0) { 1945 ret = -EFAULT; 1946 goto out; 1947 } 1948 orig_optlen = ctx.optlen; 1949 1950 if (copy_from_sockptr(ctx.optval, optval, 1951 min(ctx.optlen, max_optlen))) { 1952 ret = -EFAULT; 1953 goto out; 1954 } 1955 } 1956 1957 lock_sock(sk); 1958 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, 1959 &ctx, bpf_prog_run, retval, NULL); 1960 release_sock(sk); 1961 1962 if (ret < 0) 1963 goto out; 1964 1965 if (!sockptr_is_null(optval) && 1966 (ctx.optlen > max_optlen || ctx.optlen < 0)) { 1967 if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) { 1968 pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n", 1969 ctx.optlen, max_optlen); 1970 ret = retval; 1971 goto out; 1972 } 1973 ret = -EFAULT; 1974 goto out; 1975 } 1976 1977 if (ctx.optlen != 0) { 1978 if (!sockptr_is_null(optval) && 1979 copy_to_sockptr(optval, ctx.optval, ctx.optlen)) { 1980 ret = -EFAULT; 1981 goto out; 1982 } 1983 if (copy_to_sockptr(optlen, &ctx.optlen, sizeof(ctx.optlen))) { 1984 ret = -EFAULT; 1985 goto out; 1986 } 1987 } 1988 1989 out: 1990 sockopt_free_buf(&ctx, &buf); 1991 return ret; 1992 } 1993 1994 int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, 1995 int optname, void *optval, 1996 int *optlen, int retval) 1997 { 1998 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1999 struct bpf_sockopt_kern ctx = { 2000 .sk = sk, 2001 .level = level, 2002 .optname = optname, 2003 .optlen = *optlen, 2004 .optval = optval, 2005 .optval_end = optval + *optlen, 2006 .current_task = current, 2007 }; 2008 int ret; 2009 2010 /* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy 2011 * user data back into BPF buffer when reval != 0. This is 2012 * done as an optimization to avoid extra copy, assuming 2013 * kernel won't populate the data in case of an error. 2014 * Here we always pass the data and memset() should 2015 * be called if that data shouldn't be "exported". 2016 */ 2017 2018 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, 2019 &ctx, bpf_prog_run, retval, NULL); 2020 if (ret < 0) 2021 return ret; 2022 2023 if (ctx.optlen > *optlen) 2024 return -EFAULT; 2025 2026 /* BPF programs can shrink the buffer, export the modifications. 2027 */ 2028 if (ctx.optlen != 0) 2029 *optlen = ctx.optlen; 2030 2031 return ret; 2032 } 2033 #endif 2034 2035 static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, 2036 size_t *lenp) 2037 { 2038 ssize_t tmp_ret = 0, ret; 2039 2040 if (dir->header.parent) { 2041 tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp); 2042 if (tmp_ret < 0) 2043 return tmp_ret; 2044 } 2045 2046 ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp); 2047 if (ret < 0) 2048 return ret; 2049 *bufp += ret; 2050 *lenp -= ret; 2051 ret += tmp_ret; 2052 2053 /* Avoid leading slash. */ 2054 if (!ret) 2055 return ret; 2056 2057 tmp_ret = strscpy(*bufp, "/", *lenp); 2058 if (tmp_ret < 0) 2059 return tmp_ret; 2060 *bufp += tmp_ret; 2061 *lenp -= tmp_ret; 2062 2063 return ret + tmp_ret; 2064 } 2065 2066 BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf, 2067 size_t, buf_len, u64, flags) 2068 { 2069 ssize_t tmp_ret = 0, ret; 2070 2071 if (!buf) 2072 return -EINVAL; 2073 2074 if (!(flags & BPF_F_SYSCTL_BASE_NAME)) { 2075 if (!ctx->head) 2076 return -EINVAL; 2077 tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len); 2078 if (tmp_ret < 0) 2079 return tmp_ret; 2080 } 2081 2082 ret = strscpy(buf, ctx->table->procname, buf_len); 2083 2084 return ret < 0 ? ret : tmp_ret + ret; 2085 } 2086 2087 static const struct bpf_func_proto bpf_sysctl_get_name_proto = { 2088 .func = bpf_sysctl_get_name, 2089 .gpl_only = false, 2090 .ret_type = RET_INTEGER, 2091 .arg1_type = ARG_PTR_TO_CTX, 2092 .arg2_type = ARG_PTR_TO_MEM, 2093 .arg3_type = ARG_CONST_SIZE, 2094 .arg4_type = ARG_ANYTHING, 2095 }; 2096 2097 static int copy_sysctl_value(char *dst, size_t dst_len, char *src, 2098 size_t src_len) 2099 { 2100 if (!dst) 2101 return -EINVAL; 2102 2103 if (!dst_len) 2104 return -E2BIG; 2105 2106 if (!src || !src_len) { 2107 memset(dst, 0, dst_len); 2108 return -EINVAL; 2109 } 2110 2111 memcpy(dst, src, min(dst_len, src_len)); 2112 2113 if (dst_len > src_len) { 2114 memset(dst + src_len, '\0', dst_len - src_len); 2115 return src_len; 2116 } 2117 2118 dst[dst_len - 1] = '\0'; 2119 2120 return -E2BIG; 2121 } 2122 2123 BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx, 2124 char *, buf, size_t, buf_len) 2125 { 2126 return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len); 2127 } 2128 2129 static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = { 2130 .func = bpf_sysctl_get_current_value, 2131 .gpl_only = false, 2132 .ret_type = RET_INTEGER, 2133 .arg1_type = ARG_PTR_TO_CTX, 2134 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 2135 .arg3_type = ARG_CONST_SIZE, 2136 }; 2137 2138 BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf, 2139 size_t, buf_len) 2140 { 2141 if (!ctx->write) { 2142 if (buf && buf_len) 2143 memset(buf, '\0', buf_len); 2144 return -EINVAL; 2145 } 2146 return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len); 2147 } 2148 2149 static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = { 2150 .func = bpf_sysctl_get_new_value, 2151 .gpl_only = false, 2152 .ret_type = RET_INTEGER, 2153 .arg1_type = ARG_PTR_TO_CTX, 2154 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 2155 .arg3_type = ARG_CONST_SIZE, 2156 }; 2157 2158 BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx, 2159 const char *, buf, size_t, buf_len) 2160 { 2161 if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len) 2162 return -EINVAL; 2163 2164 if (buf_len > PAGE_SIZE - 1) 2165 return -E2BIG; 2166 2167 memcpy(ctx->new_val, buf, buf_len); 2168 ctx->new_len = buf_len; 2169 ctx->new_updated = 1; 2170 2171 return 0; 2172 } 2173 2174 static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { 2175 .func = bpf_sysctl_set_new_value, 2176 .gpl_only = false, 2177 .ret_type = RET_INTEGER, 2178 .arg1_type = ARG_PTR_TO_CTX, 2179 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 2180 .arg3_type = ARG_CONST_SIZE, 2181 }; 2182 2183 static const struct bpf_func_proto * 2184 sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2185 { 2186 const struct bpf_func_proto *func_proto; 2187 2188 func_proto = cgroup_common_func_proto(func_id, prog); 2189 if (func_proto) 2190 return func_proto; 2191 2192 func_proto = cgroup_current_func_proto(func_id, prog); 2193 if (func_proto) 2194 return func_proto; 2195 2196 switch (func_id) { 2197 case BPF_FUNC_sysctl_get_name: 2198 return &bpf_sysctl_get_name_proto; 2199 case BPF_FUNC_sysctl_get_current_value: 2200 return &bpf_sysctl_get_current_value_proto; 2201 case BPF_FUNC_sysctl_get_new_value: 2202 return &bpf_sysctl_get_new_value_proto; 2203 case BPF_FUNC_sysctl_set_new_value: 2204 return &bpf_sysctl_set_new_value_proto; 2205 case BPF_FUNC_ktime_get_coarse_ns: 2206 return &bpf_ktime_get_coarse_ns_proto; 2207 case BPF_FUNC_perf_event_output: 2208 return &bpf_event_output_data_proto; 2209 default: 2210 return bpf_base_func_proto(func_id); 2211 } 2212 } 2213 2214 static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, 2215 const struct bpf_prog *prog, 2216 struct bpf_insn_access_aux *info) 2217 { 2218 const int size_default = sizeof(__u32); 2219 2220 if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size) 2221 return false; 2222 2223 switch (off) { 2224 case bpf_ctx_range(struct bpf_sysctl, write): 2225 if (type != BPF_READ) 2226 return false; 2227 bpf_ctx_record_field_size(info, size_default); 2228 return bpf_ctx_narrow_access_ok(off, size, size_default); 2229 case bpf_ctx_range(struct bpf_sysctl, file_pos): 2230 if (type == BPF_READ) { 2231 bpf_ctx_record_field_size(info, size_default); 2232 return bpf_ctx_narrow_access_ok(off, size, size_default); 2233 } else { 2234 return size == size_default; 2235 } 2236 default: 2237 return false; 2238 } 2239 } 2240 2241 static u32 sysctl_convert_ctx_access(enum bpf_access_type type, 2242 const struct bpf_insn *si, 2243 struct bpf_insn *insn_buf, 2244 struct bpf_prog *prog, u32 *target_size) 2245 { 2246 struct bpf_insn *insn = insn_buf; 2247 u32 read_size; 2248 2249 switch (si->off) { 2250 case offsetof(struct bpf_sysctl, write): 2251 *insn++ = BPF_LDX_MEM( 2252 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 2253 bpf_target_off(struct bpf_sysctl_kern, write, 2254 sizeof_field(struct bpf_sysctl_kern, 2255 write), 2256 target_size)); 2257 break; 2258 case offsetof(struct bpf_sysctl, file_pos): 2259 /* ppos is a pointer so it should be accessed via indirect 2260 * loads and stores. Also for stores additional temporary 2261 * register is used since neither src_reg nor dst_reg can be 2262 * overridden. 2263 */ 2264 if (type == BPF_WRITE) { 2265 int treg = BPF_REG_9; 2266 2267 if (si->src_reg == treg || si->dst_reg == treg) 2268 --treg; 2269 if (si->src_reg == treg || si->dst_reg == treg) 2270 --treg; 2271 *insn++ = BPF_STX_MEM( 2272 BPF_DW, si->dst_reg, treg, 2273 offsetof(struct bpf_sysctl_kern, tmp_reg)); 2274 *insn++ = BPF_LDX_MEM( 2275 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), 2276 treg, si->dst_reg, 2277 offsetof(struct bpf_sysctl_kern, ppos)); 2278 *insn++ = BPF_RAW_INSN( 2279 BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32), 2280 treg, si->src_reg, 2281 bpf_ctx_narrow_access_offset( 2282 0, sizeof(u32), sizeof(loff_t)), 2283 si->imm); 2284 *insn++ = BPF_LDX_MEM( 2285 BPF_DW, treg, si->dst_reg, 2286 offsetof(struct bpf_sysctl_kern, tmp_reg)); 2287 } else { 2288 *insn++ = BPF_LDX_MEM( 2289 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), 2290 si->dst_reg, si->src_reg, 2291 offsetof(struct bpf_sysctl_kern, ppos)); 2292 read_size = bpf_size_to_bytes(BPF_SIZE(si->code)); 2293 *insn++ = BPF_LDX_MEM( 2294 BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 2295 bpf_ctx_narrow_access_offset( 2296 0, read_size, sizeof(loff_t))); 2297 } 2298 *target_size = sizeof(u32); 2299 break; 2300 } 2301 2302 return insn - insn_buf; 2303 } 2304 2305 const struct bpf_verifier_ops cg_sysctl_verifier_ops = { 2306 .get_func_proto = sysctl_func_proto, 2307 .is_valid_access = sysctl_is_valid_access, 2308 .convert_ctx_access = sysctl_convert_ctx_access, 2309 }; 2310 2311 const struct bpf_prog_ops cg_sysctl_prog_ops = { 2312 }; 2313 2314 #ifdef CONFIG_NET 2315 BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx) 2316 { 2317 const struct net *net = ctx ? sock_net(ctx->sk) : &init_net; 2318 2319 return net->net_cookie; 2320 } 2321 2322 static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = { 2323 .func = bpf_get_netns_cookie_sockopt, 2324 .gpl_only = false, 2325 .ret_type = RET_INTEGER, 2326 .arg1_type = ARG_PTR_TO_CTX_OR_NULL, 2327 }; 2328 #endif 2329 2330 static const struct bpf_func_proto * 2331 cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2332 { 2333 const struct bpf_func_proto *func_proto; 2334 2335 func_proto = cgroup_common_func_proto(func_id, prog); 2336 if (func_proto) 2337 return func_proto; 2338 2339 func_proto = cgroup_current_func_proto(func_id, prog); 2340 if (func_proto) 2341 return func_proto; 2342 2343 switch (func_id) { 2344 #ifdef CONFIG_NET 2345 case BPF_FUNC_get_netns_cookie: 2346 return &bpf_get_netns_cookie_sockopt_proto; 2347 case BPF_FUNC_sk_storage_get: 2348 return &bpf_sk_storage_get_proto; 2349 case BPF_FUNC_sk_storage_delete: 2350 return &bpf_sk_storage_delete_proto; 2351 case BPF_FUNC_setsockopt: 2352 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT) 2353 return &bpf_sk_setsockopt_proto; 2354 return NULL; 2355 case BPF_FUNC_getsockopt: 2356 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT) 2357 return &bpf_sk_getsockopt_proto; 2358 return NULL; 2359 #endif 2360 #ifdef CONFIG_INET 2361 case BPF_FUNC_tcp_sock: 2362 return &bpf_tcp_sock_proto; 2363 #endif 2364 case BPF_FUNC_perf_event_output: 2365 return &bpf_event_output_data_proto; 2366 default: 2367 return bpf_base_func_proto(func_id); 2368 } 2369 } 2370 2371 static bool cg_sockopt_is_valid_access(int off, int size, 2372 enum bpf_access_type type, 2373 const struct bpf_prog *prog, 2374 struct bpf_insn_access_aux *info) 2375 { 2376 const int size_default = sizeof(__u32); 2377 2378 if (off < 0 || off >= sizeof(struct bpf_sockopt)) 2379 return false; 2380 2381 if (off % size != 0) 2382 return false; 2383 2384 if (type == BPF_WRITE) { 2385 switch (off) { 2386 case offsetof(struct bpf_sockopt, retval): 2387 if (size != size_default) 2388 return false; 2389 return prog->expected_attach_type == 2390 BPF_CGROUP_GETSOCKOPT; 2391 case offsetof(struct bpf_sockopt, optname): 2392 fallthrough; 2393 case offsetof(struct bpf_sockopt, level): 2394 if (size != size_default) 2395 return false; 2396 return prog->expected_attach_type == 2397 BPF_CGROUP_SETSOCKOPT; 2398 case offsetof(struct bpf_sockopt, optlen): 2399 return size == size_default; 2400 default: 2401 return false; 2402 } 2403 } 2404 2405 switch (off) { 2406 case offsetof(struct bpf_sockopt, sk): 2407 if (size != sizeof(__u64)) 2408 return false; 2409 info->reg_type = PTR_TO_SOCKET; 2410 break; 2411 case offsetof(struct bpf_sockopt, optval): 2412 if (size != sizeof(__u64)) 2413 return false; 2414 info->reg_type = PTR_TO_PACKET; 2415 break; 2416 case offsetof(struct bpf_sockopt, optval_end): 2417 if (size != sizeof(__u64)) 2418 return false; 2419 info->reg_type = PTR_TO_PACKET_END; 2420 break; 2421 case offsetof(struct bpf_sockopt, retval): 2422 if (size != size_default) 2423 return false; 2424 return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT; 2425 default: 2426 if (size != size_default) 2427 return false; 2428 break; 2429 } 2430 return true; 2431 } 2432 2433 #define CG_SOCKOPT_READ_FIELD(F) \ 2434 BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ 2435 si->dst_reg, si->src_reg, \ 2436 offsetof(struct bpf_sockopt_kern, F)) 2437 2438 #define CG_SOCKOPT_WRITE_FIELD(F) \ 2439 BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) | \ 2440 BPF_MEM | BPF_CLASS(si->code)), \ 2441 si->dst_reg, si->src_reg, \ 2442 offsetof(struct bpf_sockopt_kern, F), \ 2443 si->imm) 2444 2445 static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, 2446 const struct bpf_insn *si, 2447 struct bpf_insn *insn_buf, 2448 struct bpf_prog *prog, 2449 u32 *target_size) 2450 { 2451 struct bpf_insn *insn = insn_buf; 2452 2453 switch (si->off) { 2454 case offsetof(struct bpf_sockopt, sk): 2455 *insn++ = CG_SOCKOPT_READ_FIELD(sk); 2456 break; 2457 case offsetof(struct bpf_sockopt, level): 2458 if (type == BPF_WRITE) 2459 *insn++ = CG_SOCKOPT_WRITE_FIELD(level); 2460 else 2461 *insn++ = CG_SOCKOPT_READ_FIELD(level); 2462 break; 2463 case offsetof(struct bpf_sockopt, optname): 2464 if (type == BPF_WRITE) 2465 *insn++ = CG_SOCKOPT_WRITE_FIELD(optname); 2466 else 2467 *insn++ = CG_SOCKOPT_READ_FIELD(optname); 2468 break; 2469 case offsetof(struct bpf_sockopt, optlen): 2470 if (type == BPF_WRITE) 2471 *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen); 2472 else 2473 *insn++ = CG_SOCKOPT_READ_FIELD(optlen); 2474 break; 2475 case offsetof(struct bpf_sockopt, retval): 2476 BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0); 2477 2478 if (type == BPF_WRITE) { 2479 int treg = BPF_REG_9; 2480 2481 if (si->src_reg == treg || si->dst_reg == treg) 2482 --treg; 2483 if (si->src_reg == treg || si->dst_reg == treg) 2484 --treg; 2485 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg, 2486 offsetof(struct bpf_sockopt_kern, tmp_reg)); 2487 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), 2488 treg, si->dst_reg, 2489 offsetof(struct bpf_sockopt_kern, current_task)); 2490 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), 2491 treg, treg, 2492 offsetof(struct task_struct, bpf_ctx)); 2493 *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM | 2494 BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), 2495 treg, si->src_reg, 2496 offsetof(struct bpf_cg_run_ctx, retval), 2497 si->imm); 2498 *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg, 2499 offsetof(struct bpf_sockopt_kern, tmp_reg)); 2500 } else { 2501 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), 2502 si->dst_reg, si->src_reg, 2503 offsetof(struct bpf_sockopt_kern, current_task)); 2504 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), 2505 si->dst_reg, si->dst_reg, 2506 offsetof(struct task_struct, bpf_ctx)); 2507 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), 2508 si->dst_reg, si->dst_reg, 2509 offsetof(struct bpf_cg_run_ctx, retval)); 2510 } 2511 break; 2512 case offsetof(struct bpf_sockopt, optval): 2513 *insn++ = CG_SOCKOPT_READ_FIELD(optval); 2514 break; 2515 case offsetof(struct bpf_sockopt, optval_end): 2516 *insn++ = CG_SOCKOPT_READ_FIELD(optval_end); 2517 break; 2518 } 2519 2520 return insn - insn_buf; 2521 } 2522 2523 static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf, 2524 bool direct_write, 2525 const struct bpf_prog *prog) 2526 { 2527 /* Nothing to do for sockopt argument. The data is kzalloc'ated. 2528 */ 2529 return 0; 2530 } 2531 2532 const struct bpf_verifier_ops cg_sockopt_verifier_ops = { 2533 .get_func_proto = cg_sockopt_func_proto, 2534 .is_valid_access = cg_sockopt_is_valid_access, 2535 .convert_ctx_access = cg_sockopt_convert_ctx_access, 2536 .gen_prologue = cg_sockopt_get_prologue, 2537 }; 2538 2539 const struct bpf_prog_ops cg_sockopt_prog_ops = { 2540 }; 2541 2542 /* Common helpers for cgroup hooks. */ 2543 const struct bpf_func_proto * 2544 cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2545 { 2546 switch (func_id) { 2547 case BPF_FUNC_get_local_storage: 2548 return &bpf_get_local_storage_proto; 2549 case BPF_FUNC_get_retval: 2550 switch (prog->expected_attach_type) { 2551 case BPF_CGROUP_INET_INGRESS: 2552 case BPF_CGROUP_INET_EGRESS: 2553 case BPF_CGROUP_SOCK_OPS: 2554 case BPF_CGROUP_UDP4_RECVMSG: 2555 case BPF_CGROUP_UDP6_RECVMSG: 2556 case BPF_CGROUP_INET4_GETPEERNAME: 2557 case BPF_CGROUP_INET6_GETPEERNAME: 2558 case BPF_CGROUP_INET4_GETSOCKNAME: 2559 case BPF_CGROUP_INET6_GETSOCKNAME: 2560 return NULL; 2561 default: 2562 return &bpf_get_retval_proto; 2563 } 2564 case BPF_FUNC_set_retval: 2565 switch (prog->expected_attach_type) { 2566 case BPF_CGROUP_INET_INGRESS: 2567 case BPF_CGROUP_INET_EGRESS: 2568 case BPF_CGROUP_SOCK_OPS: 2569 case BPF_CGROUP_UDP4_RECVMSG: 2570 case BPF_CGROUP_UDP6_RECVMSG: 2571 case BPF_CGROUP_INET4_GETPEERNAME: 2572 case BPF_CGROUP_INET6_GETPEERNAME: 2573 case BPF_CGROUP_INET4_GETSOCKNAME: 2574 case BPF_CGROUP_INET6_GETSOCKNAME: 2575 return NULL; 2576 default: 2577 return &bpf_set_retval_proto; 2578 } 2579 default: 2580 return NULL; 2581 } 2582 } 2583 2584 /* Common helpers for cgroup hooks with valid process context. */ 2585 const struct bpf_func_proto * 2586 cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2587 { 2588 switch (func_id) { 2589 case BPF_FUNC_get_current_uid_gid: 2590 return &bpf_get_current_uid_gid_proto; 2591 case BPF_FUNC_get_current_pid_tgid: 2592 return &bpf_get_current_pid_tgid_proto; 2593 case BPF_FUNC_get_current_comm: 2594 return &bpf_get_current_comm_proto; 2595 #ifdef CONFIG_CGROUP_NET_CLASSID 2596 case BPF_FUNC_get_cgroup_classid: 2597 return &bpf_get_cgroup_classid_curr_proto; 2598 #endif 2599 default: 2600 return NULL; 2601 } 2602 } 2603