1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * linux/cgroup-defs.h - basic definitions for cgroup 4 * 5 * This file provides basic type and interface. Include this file directly 6 * only if necessary to avoid cyclic dependencies. 7 */ 8 #ifndef _LINUX_CGROUP_DEFS_H 9 #define _LINUX_CGROUP_DEFS_H 10 11 #include <linux/limits.h> 12 #include <linux/list.h> 13 #include <linux/idr.h> 14 #include <linux/wait.h> 15 #include <linux/mutex.h> 16 #include <linux/rcupdate.h> 17 #include <linux/refcount.h> 18 #include <linux/percpu-refcount.h> 19 #include <linux/percpu-rwsem.h> 20 #include <linux/u64_stats_sync.h> 21 #include <linux/workqueue.h> 22 #include <linux/bpf-cgroup-defs.h> 23 #include <linux/psi_types.h> 24 25 #ifdef CONFIG_CGROUPS 26 27 struct cgroup; 28 struct cgroup_root; 29 struct cgroup_subsys; 30 struct cgroup_taskset; 31 struct kernfs_node; 32 struct kernfs_ops; 33 struct kernfs_open_file; 34 struct seq_file; 35 struct poll_table_struct; 36 37 #define MAX_CGROUP_TYPE_NAMELEN 32 38 #define MAX_CGROUP_ROOT_NAMELEN 64 39 #define MAX_CFTYPE_NAME 64 40 41 /* define the enumeration of all cgroup subsystems */ 42 #define SUBSYS(_x) _x ## _cgrp_id, 43 enum cgroup_subsys_id { 44 #include <linux/cgroup_subsys.h> 45 CGROUP_SUBSYS_COUNT, 46 }; 47 #undef SUBSYS 48 49 /* bits in struct cgroup_subsys_state flags field */ 50 enum { 51 CSS_NO_REF = (1 << 0), /* no reference counting for this css */ 52 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 53 CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ 54 CSS_VISIBLE = (1 << 3), /* css is visible to userland */ 55 CSS_DYING = (1 << 4), /* css is dying */ 56 }; 57 58 /* bits in struct cgroup flags field */ 59 enum { 60 /* Control Group requires release notifications to userspace */ 61 CGRP_NOTIFY_ON_RELEASE, 62 /* 63 * Clone the parent's configuration when creating a new child 64 * cpuset cgroup. For historical reasons, this option can be 65 * specified at mount time and thus is implemented here. 66 */ 67 CGRP_CPUSET_CLONE_CHILDREN, 68 69 /* Control group has to be frozen. */ 70 CGRP_FREEZE, 71 72 /* Cgroup is frozen. */ 73 CGRP_FROZEN, 74 75 /* Control group has to be killed. */ 76 CGRP_KILL, 77 }; 78 79 /* cgroup_root->flags */ 80 enum { 81 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 82 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 83 84 /* 85 * Consider namespaces as delegation boundaries. If this flag is 86 * set, controller specific interface files in a namespace root 87 * aren't writeable from inside the namespace. 88 */ 89 CGRP_ROOT_NS_DELEGATE = (1 << 3), 90 91 /* 92 * Enable cpuset controller in v1 cgroup to use v2 behavior. 93 */ 94 CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), 95 96 /* 97 * Enable legacy local memory.events. 98 */ 99 CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5), 100 101 /* 102 * Enable recursive subtree protection 103 */ 104 CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6), 105 }; 106 107 /* cftype->flags */ 108 enum { 109 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ 110 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ 111 CFTYPE_NS_DELEGATABLE = (1 << 2), /* writeable beyond delegation boundaries */ 112 113 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ 114 CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ 115 CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ 116 CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */ 117 118 /* internal flags, do not use outside cgroup core proper */ 119 __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ 120 __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ 121 }; 122 123 /* 124 * cgroup_file is the handle for a file instance created in a cgroup which 125 * is used, for example, to generate file changed notifications. This can 126 * be obtained by setting cftype->file_offset. 127 */ 128 struct cgroup_file { 129 /* do not access any fields from outside cgroup core */ 130 struct kernfs_node *kn; 131 unsigned long notified_at; 132 struct timer_list notify_timer; 133 }; 134 135 /* 136 * Per-subsystem/per-cgroup state maintained by the system. This is the 137 * fundamental structural building block that controllers deal with. 138 * 139 * Fields marked with "PI:" are public and immutable and may be accessed 140 * directly without synchronization. 141 */ 142 struct cgroup_subsys_state { 143 /* PI: the cgroup that this css is attached to */ 144 struct cgroup *cgroup; 145 146 /* PI: the cgroup subsystem that this css is attached to */ 147 struct cgroup_subsys *ss; 148 149 /* reference count - access via css_[try]get() and css_put() */ 150 struct percpu_ref refcnt; 151 152 /* siblings list anchored at the parent's ->children */ 153 struct list_head sibling; 154 struct list_head children; 155 156 /* flush target list anchored at cgrp->rstat_css_list */ 157 struct list_head rstat_css_node; 158 159 /* 160 * PI: Subsys-unique ID. 0 is unused and root is always 1. The 161 * matching css can be looked up using css_from_id(). 162 */ 163 int id; 164 165 unsigned int flags; 166 167 /* 168 * Monotonically increasing unique serial number which defines a 169 * uniform order among all csses. It's guaranteed that all 170 * ->children lists are in the ascending order of ->serial_nr and 171 * used to allow interrupting and resuming iterations. 172 */ 173 u64 serial_nr; 174 175 /* 176 * Incremented by online self and children. Used to guarantee that 177 * parents are not offlined before their children. 178 */ 179 atomic_t online_cnt; 180 181 /* percpu_ref killing and RCU release */ 182 struct work_struct destroy_work; 183 struct rcu_work destroy_rwork; 184 185 /* 186 * PI: the parent css. Placed here for cache proximity to following 187 * fields of the containing structure. 188 */ 189 struct cgroup_subsys_state *parent; 190 }; 191 192 /* 193 * A css_set is a structure holding pointers to a set of 194 * cgroup_subsys_state objects. This saves space in the task struct 195 * object and speeds up fork()/exit(), since a single inc/dec and a 196 * list_add()/del() can bump the reference count on the entire cgroup 197 * set for a task. 198 */ 199 struct css_set { 200 /* 201 * Set of subsystem states, one for each subsystem. This array is 202 * immutable after creation apart from the init_css_set during 203 * subsystem registration (at boot time). 204 */ 205 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 206 207 /* reference count */ 208 refcount_t refcount; 209 210 /* 211 * For a domain cgroup, the following points to self. If threaded, 212 * to the matching cset of the nearest domain ancestor. The 213 * dom_cset provides access to the domain cgroup and its csses to 214 * which domain level resource consumptions should be charged. 215 */ 216 struct css_set *dom_cset; 217 218 /* the default cgroup associated with this css_set */ 219 struct cgroup *dfl_cgrp; 220 221 /* internal task count, protected by css_set_lock */ 222 int nr_tasks; 223 224 /* 225 * Lists running through all tasks using this cgroup group. 226 * mg_tasks lists tasks which belong to this cset but are in the 227 * process of being migrated out or in. Protected by 228 * css_set_rwsem, but, during migration, once tasks are moved to 229 * mg_tasks, it can be read safely while holding cgroup_mutex. 230 */ 231 struct list_head tasks; 232 struct list_head mg_tasks; 233 struct list_head dying_tasks; 234 235 /* all css_task_iters currently walking this cset */ 236 struct list_head task_iters; 237 238 /* 239 * On the default hierarchy, ->subsys[ssid] may point to a css 240 * attached to an ancestor instead of the cgroup this css_set is 241 * associated with. The following node is anchored at 242 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to 243 * iterate through all css's attached to a given cgroup. 244 */ 245 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; 246 247 /* all threaded csets whose ->dom_cset points to this cset */ 248 struct list_head threaded_csets; 249 struct list_head threaded_csets_node; 250 251 /* 252 * List running through all cgroup groups in the same hash 253 * slot. Protected by css_set_lock 254 */ 255 struct hlist_node hlist; 256 257 /* 258 * List of cgrp_cset_links pointing at cgroups referenced from this 259 * css_set. Protected by css_set_lock. 260 */ 261 struct list_head cgrp_links; 262 263 /* 264 * List of csets participating in the on-going migration either as 265 * source or destination. Protected by cgroup_mutex. 266 */ 267 struct list_head mg_preload_node; 268 struct list_head mg_node; 269 270 /* 271 * If this cset is acting as the source of migration the following 272 * two fields are set. mg_src_cgrp and mg_dst_cgrp are 273 * respectively the source and destination cgroups of the on-going 274 * migration. mg_dst_cset is the destination cset the target tasks 275 * on this cset should be migrated to. Protected by cgroup_mutex. 276 */ 277 struct cgroup *mg_src_cgrp; 278 struct cgroup *mg_dst_cgrp; 279 struct css_set *mg_dst_cset; 280 281 /* dead and being drained, ignore for migration */ 282 bool dead; 283 284 /* For RCU-protected deletion */ 285 struct rcu_head rcu_head; 286 }; 287 288 struct cgroup_base_stat { 289 struct task_cputime cputime; 290 }; 291 292 /* 293 * rstat - cgroup scalable recursive statistics. Accounting is done 294 * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the 295 * hierarchy on reads. 296 * 297 * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are 298 * linked into the updated tree. On the following read, propagation only 299 * considers and consumes the updated tree. This makes reading O(the 300 * number of descendants which have been active since last read) instead of 301 * O(the total number of descendants). 302 * 303 * This is important because there can be a lot of (draining) cgroups which 304 * aren't active and stat may be read frequently. The combination can 305 * become very expensive. By propagating selectively, increasing reading 306 * frequency decreases the cost of each read. 307 * 308 * This struct hosts both the fields which implement the above - 309 * updated_children and updated_next - and the fields which track basic 310 * resource statistics on top of it - bsync, bstat and last_bstat. 311 */ 312 struct cgroup_rstat_cpu { 313 /* 314 * ->bsync protects ->bstat. These are the only fields which get 315 * updated in the hot path. 316 */ 317 struct u64_stats_sync bsync; 318 struct cgroup_base_stat bstat; 319 320 /* 321 * Snapshots at the last reading. These are used to calculate the 322 * deltas to propagate to the global counters. 323 */ 324 struct cgroup_base_stat last_bstat; 325 326 /* 327 * Child cgroups with stat updates on this cpu since the last read 328 * are linked on the parent's ->updated_children through 329 * ->updated_next. 330 * 331 * In addition to being more compact, singly-linked list pointing 332 * to the cgroup makes it unnecessary for each per-cpu struct to 333 * point back to the associated cgroup. 334 * 335 * Protected by per-cpu cgroup_rstat_cpu_lock. 336 */ 337 struct cgroup *updated_children; /* terminated by self cgroup */ 338 struct cgroup *updated_next; /* NULL iff not on the list */ 339 }; 340 341 struct cgroup_freezer_state { 342 /* Should the cgroup and its descendants be frozen. */ 343 bool freeze; 344 345 /* Should the cgroup actually be frozen? */ 346 int e_freeze; 347 348 /* Fields below are protected by css_set_lock */ 349 350 /* Number of frozen descendant cgroups */ 351 int nr_frozen_descendants; 352 353 /* 354 * Number of tasks, which are counted as frozen: 355 * frozen, SIGSTOPped, and PTRACEd. 356 */ 357 int nr_frozen_tasks; 358 }; 359 360 struct cgroup { 361 /* self css with NULL ->ss, points back to this cgroup */ 362 struct cgroup_subsys_state self; 363 364 unsigned long flags; /* "unsigned long" so bitops work */ 365 366 /* 367 * The depth this cgroup is at. The root is at depth zero and each 368 * step down the hierarchy increments the level. This along with 369 * ancestor_ids[] can determine whether a given cgroup is a 370 * descendant of another without traversing the hierarchy. 371 */ 372 int level; 373 374 /* Maximum allowed descent tree depth */ 375 int max_depth; 376 377 /* 378 * Keep track of total numbers of visible and dying descent cgroups. 379 * Dying cgroups are cgroups which were deleted by a user, 380 * but are still existing because someone else is holding a reference. 381 * max_descendants is a maximum allowed number of descent cgroups. 382 * 383 * nr_descendants and nr_dying_descendants are protected 384 * by cgroup_mutex and css_set_lock. It's fine to read them holding 385 * any of cgroup_mutex and css_set_lock; for writing both locks 386 * should be held. 387 */ 388 int nr_descendants; 389 int nr_dying_descendants; 390 int max_descendants; 391 392 /* 393 * Each non-empty css_set associated with this cgroup contributes 394 * one to nr_populated_csets. The counter is zero iff this cgroup 395 * doesn't have any tasks. 396 * 397 * All children which have non-zero nr_populated_csets and/or 398 * nr_populated_children of their own contribute one to either 399 * nr_populated_domain_children or nr_populated_threaded_children 400 * depending on their type. Each counter is zero iff all cgroups 401 * of the type in the subtree proper don't have any tasks. 402 */ 403 int nr_populated_csets; 404 int nr_populated_domain_children; 405 int nr_populated_threaded_children; 406 407 int nr_threaded_children; /* # of live threaded child cgroups */ 408 409 struct kernfs_node *kn; /* cgroup kernfs entry */ 410 struct cgroup_file procs_file; /* handle for "cgroup.procs" */ 411 struct cgroup_file events_file; /* handle for "cgroup.events" */ 412 413 /* 414 * The bitmask of subsystems enabled on the child cgroups. 415 * ->subtree_control is the one configured through 416 * "cgroup.subtree_control" while ->subtree_ss_mask is the effective 417 * one which may have more subsystems enabled. Controller knobs 418 * are made available iff it's enabled in ->subtree_control. 419 */ 420 u16 subtree_control; 421 u16 subtree_ss_mask; 422 u16 old_subtree_control; 423 u16 old_subtree_ss_mask; 424 425 /* Private pointers for each registered subsystem */ 426 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; 427 428 struct cgroup_root *root; 429 430 /* 431 * List of cgrp_cset_links pointing at css_sets with tasks in this 432 * cgroup. Protected by css_set_lock. 433 */ 434 struct list_head cset_links; 435 436 /* 437 * On the default hierarchy, a css_set for a cgroup with some 438 * susbsys disabled will point to css's which are associated with 439 * the closest ancestor which has the subsys enabled. The 440 * following lists all css_sets which point to this cgroup's css 441 * for the given subsystem. 442 */ 443 struct list_head e_csets[CGROUP_SUBSYS_COUNT]; 444 445 /* 446 * If !threaded, self. If threaded, it points to the nearest 447 * domain ancestor. Inside a threaded subtree, cgroups are exempt 448 * from process granularity and no-internal-task constraint. 449 * Domain level resource consumptions which aren't tied to a 450 * specific task are charged to the dom_cgrp. 451 */ 452 struct cgroup *dom_cgrp; 453 struct cgroup *old_dom_cgrp; /* used while enabling threaded */ 454 455 /* per-cpu recursive resource statistics */ 456 struct cgroup_rstat_cpu __percpu *rstat_cpu; 457 struct list_head rstat_css_list; 458 459 /* cgroup basic resource statistics */ 460 struct cgroup_base_stat last_bstat; 461 struct cgroup_base_stat bstat; 462 struct prev_cputime prev_cputime; /* for printing out cputime */ 463 464 /* 465 * list of pidlists, up to two for each namespace (one for procs, one 466 * for tasks); created on demand. 467 */ 468 struct list_head pidlists; 469 struct mutex pidlist_mutex; 470 471 /* used to wait for offlining of csses */ 472 wait_queue_head_t offline_waitq; 473 474 /* used to schedule release agent */ 475 struct work_struct release_agent_work; 476 477 /* used to track pressure stalls */ 478 struct psi_group psi; 479 480 /* used to store eBPF programs */ 481 struct cgroup_bpf bpf; 482 483 /* If there is block congestion on this cgroup. */ 484 atomic_t congestion_count; 485 486 /* Used to store internal freezer state */ 487 struct cgroup_freezer_state freezer; 488 489 /* ids of the ancestors at each level including self */ 490 u64 ancestor_ids[]; 491 }; 492 493 /* 494 * A cgroup_root represents the root of a cgroup hierarchy, and may be 495 * associated with a kernfs_root to form an active hierarchy. This is 496 * internal to cgroup core. Don't access directly from controllers. 497 */ 498 struct cgroup_root { 499 struct kernfs_root *kf_root; 500 501 /* The bitmask of subsystems attached to this hierarchy */ 502 unsigned int subsys_mask; 503 504 /* Unique id for this hierarchy. */ 505 int hierarchy_id; 506 507 /* The root cgroup. Root is destroyed on its release. */ 508 struct cgroup cgrp; 509 510 /* for cgrp->ancestor_ids[0] */ 511 u64 cgrp_ancestor_id_storage; 512 513 /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ 514 atomic_t nr_cgrps; 515 516 /* A list running through the active hierarchies */ 517 struct list_head root_list; 518 519 /* Hierarchy-specific flags */ 520 unsigned int flags; 521 522 /* The path to use for release notifications. */ 523 char release_agent_path[PATH_MAX]; 524 525 /* The name for this hierarchy - may be empty */ 526 char name[MAX_CGROUP_ROOT_NAMELEN]; 527 }; 528 529 /* 530 * struct cftype: handler definitions for cgroup control files 531 * 532 * When reading/writing to a file: 533 * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata 534 * - the 'cftype' of the file is file->f_path.dentry->d_fsdata 535 */ 536 struct cftype { 537 /* 538 * By convention, the name should begin with the name of the 539 * subsystem, followed by a period. Zero length string indicates 540 * end of cftype array. 541 */ 542 char name[MAX_CFTYPE_NAME]; 543 unsigned long private; 544 545 /* 546 * The maximum length of string, excluding trailing nul, that can 547 * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. 548 */ 549 size_t max_write_len; 550 551 /* CFTYPE_* flags */ 552 unsigned int flags; 553 554 /* 555 * If non-zero, should contain the offset from the start of css to 556 * a struct cgroup_file field. cgroup will record the handle of 557 * the created file into it. The recorded handle can be used as 558 * long as the containing css remains accessible. 559 */ 560 unsigned int file_offset; 561 562 /* 563 * Fields used for internal bookkeeping. Initialized automatically 564 * during registration. 565 */ 566 struct cgroup_subsys *ss; /* NULL for cgroup core files */ 567 struct list_head node; /* anchored at ss->cfts */ 568 struct kernfs_ops *kf_ops; 569 570 int (*open)(struct kernfs_open_file *of); 571 void (*release)(struct kernfs_open_file *of); 572 573 /* 574 * read_u64() is a shortcut for the common case of returning a 575 * single integer. Use it in place of read() 576 */ 577 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); 578 /* 579 * read_s64() is a signed version of read_u64() 580 */ 581 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); 582 583 /* generic seq_file read interface */ 584 int (*seq_show)(struct seq_file *sf, void *v); 585 586 /* optional ops, implement all or none */ 587 void *(*seq_start)(struct seq_file *sf, loff_t *ppos); 588 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); 589 void (*seq_stop)(struct seq_file *sf, void *v); 590 591 /* 592 * write_u64() is a shortcut for the common case of accepting 593 * a single integer (as parsed by simple_strtoull) from 594 * userspace. Use in place of write(); return 0 or error. 595 */ 596 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, 597 u64 val); 598 /* 599 * write_s64() is a signed version of write_u64() 600 */ 601 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, 602 s64 val); 603 604 /* 605 * write() is the generic write callback which maps directly to 606 * kernfs write operation and overrides all other operations. 607 * Maximum write size is determined by ->max_write_len. Use 608 * of_css/cft() to access the associated css and cft. 609 */ 610 ssize_t (*write)(struct kernfs_open_file *of, 611 char *buf, size_t nbytes, loff_t off); 612 613 __poll_t (*poll)(struct kernfs_open_file *of, 614 struct poll_table_struct *pt); 615 616 #ifdef CONFIG_DEBUG_LOCK_ALLOC 617 struct lock_class_key lockdep_key; 618 #endif 619 }; 620 621 /* 622 * Control Group subsystem type. 623 * See Documentation/admin-guide/cgroup-v1/cgroups.rst for details 624 */ 625 struct cgroup_subsys { 626 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); 627 int (*css_online)(struct cgroup_subsys_state *css); 628 void (*css_offline)(struct cgroup_subsys_state *css); 629 void (*css_released)(struct cgroup_subsys_state *css); 630 void (*css_free)(struct cgroup_subsys_state *css); 631 void (*css_reset)(struct cgroup_subsys_state *css); 632 void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); 633 int (*css_extra_stat_show)(struct seq_file *seq, 634 struct cgroup_subsys_state *css); 635 636 int (*can_attach)(struct cgroup_taskset *tset); 637 void (*cancel_attach)(struct cgroup_taskset *tset); 638 void (*attach)(struct cgroup_taskset *tset); 639 void (*post_attach)(void); 640 int (*can_fork)(struct task_struct *task, 641 struct css_set *cset); 642 void (*cancel_fork)(struct task_struct *task, struct css_set *cset); 643 void (*fork)(struct task_struct *task); 644 void (*exit)(struct task_struct *task); 645 void (*release)(struct task_struct *task); 646 void (*bind)(struct cgroup_subsys_state *root_css); 647 648 bool early_init:1; 649 650 /* 651 * If %true, the controller, on the default hierarchy, doesn't show 652 * up in "cgroup.controllers" or "cgroup.subtree_control", is 653 * implicitly enabled on all cgroups on the default hierarchy, and 654 * bypasses the "no internal process" constraint. This is for 655 * utility type controllers which is transparent to userland. 656 * 657 * An implicit controller can be stolen from the default hierarchy 658 * anytime and thus must be okay with offline csses from previous 659 * hierarchies coexisting with csses for the current one. 660 */ 661 bool implicit_on_dfl:1; 662 663 /* 664 * If %true, the controller, supports threaded mode on the default 665 * hierarchy. In a threaded subtree, both process granularity and 666 * no-internal-process constraint are ignored and a threaded 667 * controllers should be able to handle that. 668 * 669 * Note that as an implicit controller is automatically enabled on 670 * all cgroups on the default hierarchy, it should also be 671 * threaded. implicit && !threaded is not supported. 672 */ 673 bool threaded:1; 674 675 /* the following two fields are initialized automatically during boot */ 676 int id; 677 const char *name; 678 679 /* optional, initialized automatically during boot if not set */ 680 const char *legacy_name; 681 682 /* link to parent, protected by cgroup_lock() */ 683 struct cgroup_root *root; 684 685 /* idr for css->id */ 686 struct idr css_idr; 687 688 /* 689 * List of cftypes. Each entry is the first entry of an array 690 * terminated by zero length name. 691 */ 692 struct list_head cfts; 693 694 /* 695 * Base cftypes which are automatically registered. The two can 696 * point to the same array. 697 */ 698 struct cftype *dfl_cftypes; /* for the default hierarchy */ 699 struct cftype *legacy_cftypes; /* for the legacy hierarchies */ 700 701 /* 702 * A subsystem may depend on other subsystems. When such subsystem 703 * is enabled on a cgroup, the depended-upon subsystems are enabled 704 * together if available. Subsystems enabled due to dependency are 705 * not visible to userland until explicitly enabled. The following 706 * specifies the mask of subsystems that this one depends on. 707 */ 708 unsigned int depends_on; 709 }; 710 711 extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; 712 713 /** 714 * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups 715 * @tsk: target task 716 * 717 * Allows cgroup operations to synchronize against threadgroup changes 718 * using a percpu_rw_semaphore. 719 */ 720 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) 721 { 722 percpu_down_read(&cgroup_threadgroup_rwsem); 723 } 724 725 /** 726 * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups 727 * @tsk: target task 728 * 729 * Counterpart of cgroup_threadcgroup_change_begin(). 730 */ 731 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) 732 { 733 percpu_up_read(&cgroup_threadgroup_rwsem); 734 } 735 736 #else /* CONFIG_CGROUPS */ 737 738 #define CGROUP_SUBSYS_COUNT 0 739 740 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) 741 { 742 might_sleep(); 743 } 744 745 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} 746 747 #endif /* CONFIG_CGROUPS */ 748 749 #ifdef CONFIG_SOCK_CGROUP_DATA 750 751 /* 752 * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains 753 * per-socket cgroup information except for memcg association. 754 * 755 * On legacy hierarchies, net_prio and net_cls controllers directly 756 * set attributes on each sock which can then be tested by the network 757 * layer. On the default hierarchy, each sock is associated with the 758 * cgroup it was created in and the networking layer can match the 759 * cgroup directly. 760 */ 761 struct sock_cgroup_data { 762 struct cgroup *cgroup; /* v2 */ 763 #ifdef CONFIG_CGROUP_NET_CLASSID 764 u32 classid; /* v1 */ 765 #endif 766 #ifdef CONFIG_CGROUP_NET_PRIO 767 u16 prioidx; /* v1 */ 768 #endif 769 }; 770 771 static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) 772 { 773 #ifdef CONFIG_CGROUP_NET_PRIO 774 return READ_ONCE(skcd->prioidx); 775 #else 776 return 1; 777 #endif 778 } 779 780 static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) 781 { 782 #ifdef CONFIG_CGROUP_NET_CLASSID 783 return READ_ONCE(skcd->classid); 784 #else 785 return 0; 786 #endif 787 } 788 789 static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, 790 u16 prioidx) 791 { 792 #ifdef CONFIG_CGROUP_NET_PRIO 793 WRITE_ONCE(skcd->prioidx, prioidx); 794 #endif 795 } 796 797 static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, 798 u32 classid) 799 { 800 #ifdef CONFIG_CGROUP_NET_CLASSID 801 WRITE_ONCE(skcd->classid, classid); 802 #endif 803 } 804 805 #else /* CONFIG_SOCK_CGROUP_DATA */ 806 807 struct sock_cgroup_data { 808 }; 809 810 #endif /* CONFIG_SOCK_CGROUP_DATA */ 811 812 #endif /* _LINUX_CGROUP_DEFS_H */ 813