1 /* 2 * Functions to manage eBPF programs attached to cgroups 3 * 4 * Copyright (c) 2016 Daniel Mack 5 * 6 * This file is subject to the terms and conditions of version 2 of the GNU 7 * General Public License. See the file COPYING in the main directory of the 8 * Linux distribution for more details. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/atomic.h> 13 #include <linux/cgroup.h> 14 #include <linux/filter.h> 15 #include <linux/slab.h> 16 #include <linux/sysctl.h> 17 #include <linux/bpf.h> 18 #include <linux/bpf-cgroup.h> 19 #include <net/sock.h> 20 21 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); 22 EXPORT_SYMBOL(cgroup_bpf_enabled_key); 23 24 /** 25 * cgroup_bpf_put() - put references of all bpf programs 26 * @cgrp: the cgroup to modify 27 */ 28 void cgroup_bpf_put(struct cgroup *cgrp) 29 { 30 enum bpf_cgroup_storage_type stype; 31 unsigned int type; 32 33 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { 34 struct list_head *progs = &cgrp->bpf.progs[type]; 35 struct bpf_prog_list *pl, *tmp; 36 37 list_for_each_entry_safe(pl, tmp, progs, node) { 38 list_del(&pl->node); 39 bpf_prog_put(pl->prog); 40 for_each_cgroup_storage_type(stype) { 41 bpf_cgroup_storage_unlink(pl->storage[stype]); 42 bpf_cgroup_storage_free(pl->storage[stype]); 43 } 44 kfree(pl); 45 static_branch_dec(&cgroup_bpf_enabled_key); 46 } 47 bpf_prog_array_free(cgrp->bpf.effective[type]); 48 } 49 } 50 51 /* count number of elements in the list. 52 * it's slow but the list cannot be long 53 */ 54 static u32 prog_list_length(struct list_head *head) 55 { 56 struct bpf_prog_list *pl; 57 u32 cnt = 0; 58 59 list_for_each_entry(pl, head, node) { 60 if (!pl->prog) 61 continue; 62 cnt++; 63 } 64 return cnt; 65 } 66 67 /* if parent has non-overridable prog attached, 68 * disallow attaching new programs to the descendent cgroup. 69 * if parent has overridable or multi-prog, allow attaching 70 */ 71 static bool hierarchy_allows_attach(struct cgroup *cgrp, 72 enum bpf_attach_type type, 73 u32 new_flags) 74 { 75 struct cgroup *p; 76 77 p = cgroup_parent(cgrp); 78 if (!p) 79 return true; 80 do { 81 u32 flags = p->bpf.flags[type]; 82 u32 cnt; 83 84 if (flags & BPF_F_ALLOW_MULTI) 85 return true; 86 cnt = prog_list_length(&p->bpf.progs[type]); 87 WARN_ON_ONCE(cnt > 1); 88 if (cnt == 1) 89 return !!(flags & BPF_F_ALLOW_OVERRIDE); 90 p = cgroup_parent(p); 91 } while (p); 92 return true; 93 } 94 95 /* compute a chain of effective programs for a given cgroup: 96 * start from the list of programs in this cgroup and add 97 * all parent programs. 98 * Note that parent's F_ALLOW_OVERRIDE-type program is yielding 99 * to programs in this cgroup 100 */ 101 static int compute_effective_progs(struct cgroup *cgrp, 102 enum bpf_attach_type type, 103 struct bpf_prog_array __rcu **array) 104 { 105 enum bpf_cgroup_storage_type stype; 106 struct bpf_prog_array *progs; 107 struct bpf_prog_list *pl; 108 struct cgroup *p = cgrp; 109 int cnt = 0; 110 111 /* count number of effective programs by walking parents */ 112 do { 113 if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) 114 cnt += prog_list_length(&p->bpf.progs[type]); 115 p = cgroup_parent(p); 116 } while (p); 117 118 progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); 119 if (!progs) 120 return -ENOMEM; 121 122 /* populate the array with effective progs */ 123 cnt = 0; 124 p = cgrp; 125 do { 126 if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) 127 continue; 128 129 list_for_each_entry(pl, &p->bpf.progs[type], node) { 130 if (!pl->prog) 131 continue; 132 133 progs->items[cnt].prog = pl->prog; 134 for_each_cgroup_storage_type(stype) 135 progs->items[cnt].cgroup_storage[stype] = 136 pl->storage[stype]; 137 cnt++; 138 } 139 } while ((p = cgroup_parent(p))); 140 141 rcu_assign_pointer(*array, progs); 142 return 0; 143 } 144 145 static void activate_effective_progs(struct cgroup *cgrp, 146 enum bpf_attach_type type, 147 struct bpf_prog_array __rcu *array) 148 { 149 struct bpf_prog_array __rcu *old_array; 150 151 old_array = xchg(&cgrp->bpf.effective[type], array); 152 /* free prog array after grace period, since __cgroup_bpf_run_*() 153 * might be still walking the array 154 */ 155 bpf_prog_array_free(old_array); 156 } 157 158 /** 159 * cgroup_bpf_inherit() - inherit effective programs from parent 160 * @cgrp: the cgroup to modify 161 */ 162 int cgroup_bpf_inherit(struct cgroup *cgrp) 163 { 164 /* has to use marco instead of const int, since compiler thinks 165 * that array below is variable length 166 */ 167 #define NR ARRAY_SIZE(cgrp->bpf.effective) 168 struct bpf_prog_array __rcu *arrays[NR] = {}; 169 int i; 170 171 for (i = 0; i < NR; i++) 172 INIT_LIST_HEAD(&cgrp->bpf.progs[i]); 173 174 for (i = 0; i < NR; i++) 175 if (compute_effective_progs(cgrp, i, &arrays[i])) 176 goto cleanup; 177 178 for (i = 0; i < NR; i++) 179 activate_effective_progs(cgrp, i, arrays[i]); 180 181 return 0; 182 cleanup: 183 for (i = 0; i < NR; i++) 184 bpf_prog_array_free(arrays[i]); 185 return -ENOMEM; 186 } 187 188 static int update_effective_progs(struct cgroup *cgrp, 189 enum bpf_attach_type type) 190 { 191 struct cgroup_subsys_state *css; 192 int err; 193 194 /* allocate and recompute effective prog arrays */ 195 css_for_each_descendant_pre(css, &cgrp->self) { 196 struct cgroup *desc = container_of(css, struct cgroup, self); 197 198 err = compute_effective_progs(desc, type, &desc->bpf.inactive); 199 if (err) 200 goto cleanup; 201 } 202 203 /* all allocations were successful. Activate all prog arrays */ 204 css_for_each_descendant_pre(css, &cgrp->self) { 205 struct cgroup *desc = container_of(css, struct cgroup, self); 206 207 activate_effective_progs(desc, type, desc->bpf.inactive); 208 desc->bpf.inactive = NULL; 209 } 210 211 return 0; 212 213 cleanup: 214 /* oom while computing effective. Free all computed effective arrays 215 * since they were not activated 216 */ 217 css_for_each_descendant_pre(css, &cgrp->self) { 218 struct cgroup *desc = container_of(css, struct cgroup, self); 219 220 bpf_prog_array_free(desc->bpf.inactive); 221 desc->bpf.inactive = NULL; 222 } 223 224 return err; 225 } 226 227 #define BPF_CGROUP_MAX_PROGS 64 228 229 /** 230 * __cgroup_bpf_attach() - Attach the program to a cgroup, and 231 * propagate the change to descendants 232 * @cgrp: The cgroup which descendants to traverse 233 * @prog: A program to attach 234 * @type: Type of attach operation 235 * @flags: Option flags 236 * 237 * Must be called with cgroup_mutex held. 238 */ 239 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, 240 enum bpf_attach_type type, u32 flags) 241 { 242 struct list_head *progs = &cgrp->bpf.progs[type]; 243 struct bpf_prog *old_prog = NULL; 244 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], 245 *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; 246 enum bpf_cgroup_storage_type stype; 247 struct bpf_prog_list *pl; 248 bool pl_was_allocated; 249 int err; 250 251 if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) 252 /* invalid combination */ 253 return -EINVAL; 254 255 if (!hierarchy_allows_attach(cgrp, type, flags)) 256 return -EPERM; 257 258 if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) 259 /* Disallow attaching non-overridable on top 260 * of existing overridable in this cgroup. 261 * Disallow attaching multi-prog if overridable or none 262 */ 263 return -EPERM; 264 265 if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) 266 return -E2BIG; 267 268 for_each_cgroup_storage_type(stype) { 269 storage[stype] = bpf_cgroup_storage_alloc(prog, stype); 270 if (IS_ERR(storage[stype])) { 271 storage[stype] = NULL; 272 for_each_cgroup_storage_type(stype) 273 bpf_cgroup_storage_free(storage[stype]); 274 return -ENOMEM; 275 } 276 } 277 278 if (flags & BPF_F_ALLOW_MULTI) { 279 list_for_each_entry(pl, progs, node) { 280 if (pl->prog == prog) { 281 /* disallow attaching the same prog twice */ 282 for_each_cgroup_storage_type(stype) 283 bpf_cgroup_storage_free(storage[stype]); 284 return -EINVAL; 285 } 286 } 287 288 pl = kmalloc(sizeof(*pl), GFP_KERNEL); 289 if (!pl) { 290 for_each_cgroup_storage_type(stype) 291 bpf_cgroup_storage_free(storage[stype]); 292 return -ENOMEM; 293 } 294 295 pl_was_allocated = true; 296 pl->prog = prog; 297 for_each_cgroup_storage_type(stype) 298 pl->storage[stype] = storage[stype]; 299 list_add_tail(&pl->node, progs); 300 } else { 301 if (list_empty(progs)) { 302 pl = kmalloc(sizeof(*pl), GFP_KERNEL); 303 if (!pl) { 304 for_each_cgroup_storage_type(stype) 305 bpf_cgroup_storage_free(storage[stype]); 306 return -ENOMEM; 307 } 308 pl_was_allocated = true; 309 list_add_tail(&pl->node, progs); 310 } else { 311 pl = list_first_entry(progs, typeof(*pl), node); 312 old_prog = pl->prog; 313 for_each_cgroup_storage_type(stype) { 314 old_storage[stype] = pl->storage[stype]; 315 bpf_cgroup_storage_unlink(old_storage[stype]); 316 } 317 pl_was_allocated = false; 318 } 319 pl->prog = prog; 320 for_each_cgroup_storage_type(stype) 321 pl->storage[stype] = storage[stype]; 322 } 323 324 cgrp->bpf.flags[type] = flags; 325 326 err = update_effective_progs(cgrp, type); 327 if (err) 328 goto cleanup; 329 330 static_branch_inc(&cgroup_bpf_enabled_key); 331 for_each_cgroup_storage_type(stype) { 332 if (!old_storage[stype]) 333 continue; 334 bpf_cgroup_storage_free(old_storage[stype]); 335 } 336 if (old_prog) { 337 bpf_prog_put(old_prog); 338 static_branch_dec(&cgroup_bpf_enabled_key); 339 } 340 for_each_cgroup_storage_type(stype) 341 bpf_cgroup_storage_link(storage[stype], cgrp, type); 342 return 0; 343 344 cleanup: 345 /* and cleanup the prog list */ 346 pl->prog = old_prog; 347 for_each_cgroup_storage_type(stype) { 348 bpf_cgroup_storage_free(pl->storage[stype]); 349 pl->storage[stype] = old_storage[stype]; 350 bpf_cgroup_storage_link(old_storage[stype], cgrp, type); 351 } 352 if (pl_was_allocated) { 353 list_del(&pl->node); 354 kfree(pl); 355 } 356 return err; 357 } 358 359 /** 360 * __cgroup_bpf_detach() - Detach the program from a cgroup, and 361 * propagate the change to descendants 362 * @cgrp: The cgroup which descendants to traverse 363 * @prog: A program to detach or NULL 364 * @type: Type of detach operation 365 * 366 * Must be called with cgroup_mutex held. 367 */ 368 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 369 enum bpf_attach_type type) 370 { 371 struct list_head *progs = &cgrp->bpf.progs[type]; 372 enum bpf_cgroup_storage_type stype; 373 u32 flags = cgrp->bpf.flags[type]; 374 struct bpf_prog *old_prog = NULL; 375 struct bpf_prog_list *pl; 376 int err; 377 378 if (flags & BPF_F_ALLOW_MULTI) { 379 if (!prog) 380 /* to detach MULTI prog the user has to specify valid FD 381 * of the program to be detached 382 */ 383 return -EINVAL; 384 } else { 385 if (list_empty(progs)) 386 /* report error when trying to detach and nothing is attached */ 387 return -ENOENT; 388 } 389 390 if (flags & BPF_F_ALLOW_MULTI) { 391 /* find the prog and detach it */ 392 list_for_each_entry(pl, progs, node) { 393 if (pl->prog != prog) 394 continue; 395 old_prog = prog; 396 /* mark it deleted, so it's ignored while 397 * recomputing effective 398 */ 399 pl->prog = NULL; 400 break; 401 } 402 if (!old_prog) 403 return -ENOENT; 404 } else { 405 /* to maintain backward compatibility NONE and OVERRIDE cgroups 406 * allow detaching with invalid FD (prog==NULL) 407 */ 408 pl = list_first_entry(progs, typeof(*pl), node); 409 old_prog = pl->prog; 410 pl->prog = NULL; 411 } 412 413 err = update_effective_progs(cgrp, type); 414 if (err) 415 goto cleanup; 416 417 /* now can actually delete it from this cgroup list */ 418 list_del(&pl->node); 419 for_each_cgroup_storage_type(stype) { 420 bpf_cgroup_storage_unlink(pl->storage[stype]); 421 bpf_cgroup_storage_free(pl->storage[stype]); 422 } 423 kfree(pl); 424 if (list_empty(progs)) 425 /* last program was detached, reset flags to zero */ 426 cgrp->bpf.flags[type] = 0; 427 428 bpf_prog_put(old_prog); 429 static_branch_dec(&cgroup_bpf_enabled_key); 430 return 0; 431 432 cleanup: 433 /* and restore back old_prog */ 434 pl->prog = old_prog; 435 return err; 436 } 437 438 /* Must be called with cgroup_mutex held to avoid races. */ 439 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 440 union bpf_attr __user *uattr) 441 { 442 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 443 enum bpf_attach_type type = attr->query.attach_type; 444 struct list_head *progs = &cgrp->bpf.progs[type]; 445 u32 flags = cgrp->bpf.flags[type]; 446 int cnt, ret = 0, i; 447 448 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) 449 cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); 450 else 451 cnt = prog_list_length(progs); 452 453 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 454 return -EFAULT; 455 if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) 456 return -EFAULT; 457 if (attr->query.prog_cnt == 0 || !prog_ids || !cnt) 458 /* return early if user requested only program count + flags */ 459 return 0; 460 if (attr->query.prog_cnt < cnt) { 461 cnt = attr->query.prog_cnt; 462 ret = -ENOSPC; 463 } 464 465 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { 466 return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], 467 prog_ids, cnt); 468 } else { 469 struct bpf_prog_list *pl; 470 u32 id; 471 472 i = 0; 473 list_for_each_entry(pl, progs, node) { 474 id = pl->prog->aux->id; 475 if (copy_to_user(prog_ids + i, &id, sizeof(id))) 476 return -EFAULT; 477 if (++i == cnt) 478 break; 479 } 480 } 481 return ret; 482 } 483 484 int cgroup_bpf_prog_attach(const union bpf_attr *attr, 485 enum bpf_prog_type ptype, struct bpf_prog *prog) 486 { 487 struct cgroup *cgrp; 488 int ret; 489 490 cgrp = cgroup_get_from_fd(attr->target_fd); 491 if (IS_ERR(cgrp)) 492 return PTR_ERR(cgrp); 493 494 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 495 attr->attach_flags); 496 cgroup_put(cgrp); 497 return ret; 498 } 499 500 int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 501 { 502 struct bpf_prog *prog; 503 struct cgroup *cgrp; 504 int ret; 505 506 cgrp = cgroup_get_from_fd(attr->target_fd); 507 if (IS_ERR(cgrp)) 508 return PTR_ERR(cgrp); 509 510 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 511 if (IS_ERR(prog)) 512 prog = NULL; 513 514 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 515 if (prog) 516 bpf_prog_put(prog); 517 518 cgroup_put(cgrp); 519 return ret; 520 } 521 522 int cgroup_bpf_prog_query(const union bpf_attr *attr, 523 union bpf_attr __user *uattr) 524 { 525 struct cgroup *cgrp; 526 int ret; 527 528 cgrp = cgroup_get_from_fd(attr->query.target_fd); 529 if (IS_ERR(cgrp)) 530 return PTR_ERR(cgrp); 531 532 ret = cgroup_bpf_query(cgrp, attr, uattr); 533 534 cgroup_put(cgrp); 535 return ret; 536 } 537 538 /** 539 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering 540 * @sk: The socket sending or receiving traffic 541 * @skb: The skb that is being sent or received 542 * @type: The type of program to be exectuted 543 * 544 * If no socket is passed, or the socket is not of type INET or INET6, 545 * this function does nothing and returns 0. 546 * 547 * The program type passed in via @type must be suitable for network 548 * filtering. No further check is performed to assert that. 549 * 550 * This function will return %-EPERM if any if an attached program was found 551 * and if it returned != 1 during execution. In all other cases, 0 is returned. 552 */ 553 int __cgroup_bpf_run_filter_skb(struct sock *sk, 554 struct sk_buff *skb, 555 enum bpf_attach_type type) 556 { 557 unsigned int offset = skb->data - skb_network_header(skb); 558 struct sock *save_sk; 559 void *saved_data_end; 560 struct cgroup *cgrp; 561 int ret; 562 563 if (!sk || !sk_fullsock(sk)) 564 return 0; 565 566 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) 567 return 0; 568 569 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 570 save_sk = skb->sk; 571 skb->sk = sk; 572 __skb_push(skb, offset); 573 574 /* compute pointers for the bpf prog */ 575 bpf_compute_and_save_data_end(skb, &saved_data_end); 576 577 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 578 __bpf_prog_run_save_cb); 579 bpf_restore_data_end(skb, saved_data_end); 580 __skb_pull(skb, offset); 581 skb->sk = save_sk; 582 return ret == 1 ? 0 : -EPERM; 583 } 584 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); 585 586 /** 587 * __cgroup_bpf_run_filter_sk() - Run a program on a sock 588 * @sk: sock structure to manipulate 589 * @type: The type of program to be exectuted 590 * 591 * socket is passed is expected to be of type INET or INET6. 592 * 593 * The program type passed in via @type must be suitable for sock 594 * filtering. No further check is performed to assert that. 595 * 596 * This function will return %-EPERM if any if an attached program was found 597 * and if it returned != 1 during execution. In all other cases, 0 is returned. 598 */ 599 int __cgroup_bpf_run_filter_sk(struct sock *sk, 600 enum bpf_attach_type type) 601 { 602 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 603 int ret; 604 605 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); 606 return ret == 1 ? 0 : -EPERM; 607 } 608 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 609 610 /** 611 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and 612 * provided by user sockaddr 613 * @sk: sock struct that will use sockaddr 614 * @uaddr: sockaddr struct provided by user 615 * @type: The type of program to be exectuted 616 * @t_ctx: Pointer to attach type specific context 617 * 618 * socket is expected to be of type INET or INET6. 619 * 620 * This function will return %-EPERM if an attached program is found and 621 * returned value != 1 during execution. In all other cases, 0 is returned. 622 */ 623 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, 624 struct sockaddr *uaddr, 625 enum bpf_attach_type type, 626 void *t_ctx) 627 { 628 struct bpf_sock_addr_kern ctx = { 629 .sk = sk, 630 .uaddr = uaddr, 631 .t_ctx = t_ctx, 632 }; 633 struct sockaddr_storage unspec; 634 struct cgroup *cgrp; 635 int ret; 636 637 /* Check socket family since not all sockets represent network 638 * endpoint (e.g. AF_UNIX). 639 */ 640 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) 641 return 0; 642 643 if (!ctx.uaddr) { 644 memset(&unspec, 0, sizeof(unspec)); 645 ctx.uaddr = (struct sockaddr *)&unspec; 646 } 647 648 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 649 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); 650 651 return ret == 1 ? 0 : -EPERM; 652 } 653 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); 654 655 /** 656 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock 657 * @sk: socket to get cgroup from 658 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains 659 * sk with connection information (IP addresses, etc.) May not contain 660 * cgroup info if it is a req sock. 661 * @type: The type of program to be exectuted 662 * 663 * socket passed is expected to be of type INET or INET6. 664 * 665 * The program type passed in via @type must be suitable for sock_ops 666 * filtering. No further check is performed to assert that. 667 * 668 * This function will return %-EPERM if any if an attached program was found 669 * and if it returned != 1 during execution. In all other cases, 0 is returned. 670 */ 671 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, 672 struct bpf_sock_ops_kern *sock_ops, 673 enum bpf_attach_type type) 674 { 675 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 676 int ret; 677 678 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, 679 BPF_PROG_RUN); 680 return ret == 1 ? 0 : -EPERM; 681 } 682 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); 683 684 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, 685 short access, enum bpf_attach_type type) 686 { 687 struct cgroup *cgrp; 688 struct bpf_cgroup_dev_ctx ctx = { 689 .access_type = (access << 16) | dev_type, 690 .major = major, 691 .minor = minor, 692 }; 693 int allow = 1; 694 695 rcu_read_lock(); 696 cgrp = task_dfl_cgroup(current); 697 allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, 698 BPF_PROG_RUN); 699 rcu_read_unlock(); 700 701 return !allow; 702 } 703 EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); 704 705 static const struct bpf_func_proto * 706 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 707 { 708 switch (func_id) { 709 case BPF_FUNC_map_lookup_elem: 710 return &bpf_map_lookup_elem_proto; 711 case BPF_FUNC_map_update_elem: 712 return &bpf_map_update_elem_proto; 713 case BPF_FUNC_map_delete_elem: 714 return &bpf_map_delete_elem_proto; 715 case BPF_FUNC_get_current_uid_gid: 716 return &bpf_get_current_uid_gid_proto; 717 case BPF_FUNC_get_local_storage: 718 return &bpf_get_local_storage_proto; 719 case BPF_FUNC_get_current_cgroup_id: 720 return &bpf_get_current_cgroup_id_proto; 721 case BPF_FUNC_trace_printk: 722 if (capable(CAP_SYS_ADMIN)) 723 return bpf_get_trace_printk_proto(); 724 /* fall through */ 725 default: 726 return NULL; 727 } 728 } 729 730 static const struct bpf_func_proto * 731 cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 732 { 733 return cgroup_base_func_proto(func_id, prog); 734 } 735 736 static bool cgroup_dev_is_valid_access(int off, int size, 737 enum bpf_access_type type, 738 const struct bpf_prog *prog, 739 struct bpf_insn_access_aux *info) 740 { 741 const int size_default = sizeof(__u32); 742 743 if (type == BPF_WRITE) 744 return false; 745 746 if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) 747 return false; 748 /* The verifier guarantees that size > 0. */ 749 if (off % size != 0) 750 return false; 751 752 switch (off) { 753 case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): 754 bpf_ctx_record_field_size(info, size_default); 755 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 756 return false; 757 break; 758 default: 759 if (size != size_default) 760 return false; 761 } 762 763 return true; 764 } 765 766 const struct bpf_prog_ops cg_dev_prog_ops = { 767 }; 768 769 const struct bpf_verifier_ops cg_dev_verifier_ops = { 770 .get_func_proto = cgroup_dev_func_proto, 771 .is_valid_access = cgroup_dev_is_valid_access, 772 }; 773 774 /** 775 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl 776 * 777 * @head: sysctl table header 778 * @table: sysctl table 779 * @write: sysctl is being read (= 0) or written (= 1) 780 * @type: type of program to be executed 781 * 782 * Program is run when sysctl is being accessed, either read or written, and 783 * can allow or deny such access. 784 * 785 * This function will return %-EPERM if an attached program is found and 786 * returned value != 1 during execution. In all other cases 0 is returned. 787 */ 788 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, 789 struct ctl_table *table, int write, 790 enum bpf_attach_type type) 791 { 792 struct bpf_sysctl_kern ctx = { 793 .head = head, 794 .table = table, 795 .write = write, 796 }; 797 struct cgroup *cgrp; 798 int ret; 799 800 rcu_read_lock(); 801 cgrp = task_dfl_cgroup(current); 802 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); 803 rcu_read_unlock(); 804 805 return ret == 1 ? 0 : -EPERM; 806 } 807 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); 808 809 static const struct bpf_func_proto * 810 sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 811 { 812 return cgroup_base_func_proto(func_id, prog); 813 } 814 815 static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, 816 const struct bpf_prog *prog, 817 struct bpf_insn_access_aux *info) 818 { 819 const int size_default = sizeof(__u32); 820 821 if (off < 0 || off + size > sizeof(struct bpf_sysctl) || 822 off % size || type != BPF_READ) 823 return false; 824 825 switch (off) { 826 case offsetof(struct bpf_sysctl, write): 827 bpf_ctx_record_field_size(info, size_default); 828 return bpf_ctx_narrow_access_ok(off, size, size_default); 829 default: 830 return false; 831 } 832 } 833 834 static u32 sysctl_convert_ctx_access(enum bpf_access_type type, 835 const struct bpf_insn *si, 836 struct bpf_insn *insn_buf, 837 struct bpf_prog *prog, u32 *target_size) 838 { 839 struct bpf_insn *insn = insn_buf; 840 841 switch (si->off) { 842 case offsetof(struct bpf_sysctl, write): 843 *insn++ = BPF_LDX_MEM( 844 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 845 bpf_target_off(struct bpf_sysctl_kern, write, 846 FIELD_SIZEOF(struct bpf_sysctl_kern, 847 write), 848 target_size)); 849 break; 850 } 851 852 return insn - insn_buf; 853 } 854 855 const struct bpf_verifier_ops cg_sysctl_verifier_ops = { 856 .get_func_proto = sysctl_func_proto, 857 .is_valid_access = sysctl_is_valid_access, 858 .convert_ctx_access = sysctl_convert_ctx_access, 859 }; 860 861 const struct bpf_prog_ops cg_sysctl_prog_ops = { 862 }; 863