1 #ifndef _LINUX_CGROUP_H 2 #define _LINUX_CGROUP_H 3 /* 4 * cgroup interface 5 * 6 * Copyright (C) 2003 BULL SA 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. 8 * 9 */ 10 11 #include <linux/sched.h> 12 #include <linux/cpumask.h> 13 #include <linux/nodemask.h> 14 #include <linux/rcupdate.h> 15 #include <linux/rculist.h> 16 #include <linux/cgroupstats.h> 17 #include <linux/rwsem.h> 18 #include <linux/idr.h> 19 #include <linux/workqueue.h> 20 #include <linux/fs.h> 21 #include <linux/percpu-refcount.h> 22 #include <linux/seq_file.h> 23 #include <linux/kernfs.h> 24 #include <linux/wait.h> 25 26 #ifdef CONFIG_CGROUPS 27 28 struct cgroup_root; 29 struct cgroup_subsys; 30 struct inode; 31 struct cgroup; 32 33 extern int cgroup_init_early(void); 34 extern int cgroup_init(void); 35 extern void cgroup_fork(struct task_struct *p); 36 extern void cgroup_post_fork(struct task_struct *p); 37 extern void cgroup_exit(struct task_struct *p); 38 extern int cgroupstats_build(struct cgroupstats *stats, 39 struct dentry *dentry); 40 41 extern int proc_cgroup_show(struct seq_file *, void *); 42 43 /* define the enumeration of all cgroup subsystems */ 44 #define SUBSYS(_x) _x ## _cgrp_id, 45 enum cgroup_subsys_id { 46 #include <linux/cgroup_subsys.h> 47 CGROUP_SUBSYS_COUNT, 48 }; 49 #undef SUBSYS 50 51 /* 52 * Per-subsystem/per-cgroup state maintained by the system. This is the 53 * fundamental structural building block that controllers deal with. 54 * 55 * Fields marked with "PI:" are public and immutable and may be accessed 56 * directly without synchronization. 57 */ 58 struct cgroup_subsys_state { 59 /* PI: the cgroup that this css is attached to */ 60 struct cgroup *cgroup; 61 62 /* PI: the cgroup subsystem that this css is attached to */ 63 struct cgroup_subsys *ss; 64 65 /* reference count - access via css_[try]get() and css_put() */ 66 struct percpu_ref refcnt; 67 68 /* PI: the parent css */ 69 struct cgroup_subsys_state *parent; 70 71 /* siblings list anchored at the parent's ->children */ 72 struct list_head sibling; 73 struct list_head children; 74 75 /* 76 * PI: Subsys-unique ID. 0 is unused and root is always 1. The 77 * matching css can be looked up using css_from_id(). 78 */ 79 int id; 80 81 unsigned int flags; 82 83 /* 84 * Monotonically increasing unique serial number which defines a 85 * uniform order among all csses. It's guaranteed that all 86 * ->children lists are in the ascending order of ->serial_nr and 87 * used to allow interrupting and resuming iterations. 88 */ 89 u64 serial_nr; 90 91 /* percpu_ref killing and RCU release */ 92 struct rcu_head rcu_head; 93 struct work_struct destroy_work; 94 }; 95 96 /* bits in struct cgroup_subsys_state flags field */ 97 enum { 98 CSS_NO_REF = (1 << 0), /* no reference counting for this css */ 99 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 100 CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ 101 }; 102 103 /** 104 * css_get - obtain a reference on the specified css 105 * @css: target css 106 * 107 * The caller must already have a reference. 108 */ 109 static inline void css_get(struct cgroup_subsys_state *css) 110 { 111 if (!(css->flags & CSS_NO_REF)) 112 percpu_ref_get(&css->refcnt); 113 } 114 115 /** 116 * css_tryget - try to obtain a reference on the specified css 117 * @css: target css 118 * 119 * Obtain a reference on @css unless it already has reached zero and is 120 * being released. This function doesn't care whether @css is on or 121 * offline. The caller naturally needs to ensure that @css is accessible 122 * but doesn't have to be holding a reference on it - IOW, RCU protected 123 * access is good enough for this function. Returns %true if a reference 124 * count was successfully obtained; %false otherwise. 125 */ 126 static inline bool css_tryget(struct cgroup_subsys_state *css) 127 { 128 if (!(css->flags & CSS_NO_REF)) 129 return percpu_ref_tryget(&css->refcnt); 130 return true; 131 } 132 133 /** 134 * css_tryget_online - try to obtain a reference on the specified css if online 135 * @css: target css 136 * 137 * Obtain a reference on @css if it's online. The caller naturally needs 138 * to ensure that @css is accessible but doesn't have to be holding a 139 * reference on it - IOW, RCU protected access is good enough for this 140 * function. Returns %true if a reference count was successfully obtained; 141 * %false otherwise. 142 */ 143 static inline bool css_tryget_online(struct cgroup_subsys_state *css) 144 { 145 if (!(css->flags & CSS_NO_REF)) 146 return percpu_ref_tryget_live(&css->refcnt); 147 return true; 148 } 149 150 /** 151 * css_put - put a css reference 152 * @css: target css 153 * 154 * Put a reference obtained via css_get() and css_tryget_online(). 155 */ 156 static inline void css_put(struct cgroup_subsys_state *css) 157 { 158 if (!(css->flags & CSS_NO_REF)) 159 percpu_ref_put(&css->refcnt); 160 } 161 162 /* bits in struct cgroup flags field */ 163 enum { 164 /* 165 * Control Group has previously had a child cgroup or a task, 166 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) 167 */ 168 CGRP_RELEASABLE, 169 /* Control Group requires release notifications to userspace */ 170 CGRP_NOTIFY_ON_RELEASE, 171 /* 172 * Clone the parent's configuration when creating a new child 173 * cpuset cgroup. For historical reasons, this option can be 174 * specified at mount time and thus is implemented here. 175 */ 176 CGRP_CPUSET_CLONE_CHILDREN, 177 }; 178 179 struct cgroup { 180 /* self css with NULL ->ss, points back to this cgroup */ 181 struct cgroup_subsys_state self; 182 183 unsigned long flags; /* "unsigned long" so bitops work */ 184 185 /* 186 * idr allocated in-hierarchy ID. 187 * 188 * ID 0 is not used, the ID of the root cgroup is always 1, and a 189 * new cgroup will be assigned with a smallest available ID. 190 * 191 * Allocating/Removing ID must be protected by cgroup_mutex. 192 */ 193 int id; 194 195 /* 196 * If this cgroup contains any tasks, it contributes one to 197 * populated_cnt. All children with non-zero popuplated_cnt of 198 * their own contribute one. The count is zero iff there's no task 199 * in this cgroup or its subtree. 200 */ 201 int populated_cnt; 202 203 struct kernfs_node *kn; /* cgroup kernfs entry */ 204 struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ 205 206 /* 207 * The bitmask of subsystems enabled on the child cgroups. 208 * ->subtree_control is the one configured through 209 * "cgroup.subtree_control" while ->child_subsys_mask is the 210 * effective one which may have more subsystems enabled. 211 * Controller knobs are made available iff it's enabled in 212 * ->subtree_control. 213 */ 214 unsigned int subtree_control; 215 unsigned int child_subsys_mask; 216 217 /* Private pointers for each registered subsystem */ 218 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; 219 220 struct cgroup_root *root; 221 222 /* 223 * List of cgrp_cset_links pointing at css_sets with tasks in this 224 * cgroup. Protected by css_set_lock. 225 */ 226 struct list_head cset_links; 227 228 /* 229 * On the default hierarchy, a css_set for a cgroup with some 230 * susbsys disabled will point to css's which are associated with 231 * the closest ancestor which has the subsys enabled. The 232 * following lists all css_sets which point to this cgroup's css 233 * for the given subsystem. 234 */ 235 struct list_head e_csets[CGROUP_SUBSYS_COUNT]; 236 237 /* 238 * Linked list running through all cgroups that can 239 * potentially be reaped by the release agent. Protected by 240 * release_list_lock 241 */ 242 struct list_head release_list; 243 244 /* 245 * list of pidlists, up to two for each namespace (one for procs, one 246 * for tasks); created on demand. 247 */ 248 struct list_head pidlists; 249 struct mutex pidlist_mutex; 250 251 /* used to wait for offlining of csses */ 252 wait_queue_head_t offline_waitq; 253 }; 254 255 #define MAX_CGROUP_ROOT_NAMELEN 64 256 257 /* cgroup_root->flags */ 258 enum { 259 CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ 260 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 261 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 262 }; 263 264 /* 265 * A cgroup_root represents the root of a cgroup hierarchy, and may be 266 * associated with a kernfs_root to form an active hierarchy. This is 267 * internal to cgroup core. Don't access directly from controllers. 268 */ 269 struct cgroup_root { 270 struct kernfs_root *kf_root; 271 272 /* The bitmask of subsystems attached to this hierarchy */ 273 unsigned int subsys_mask; 274 275 /* Unique id for this hierarchy. */ 276 int hierarchy_id; 277 278 /* The root cgroup. Root is destroyed on its release. */ 279 struct cgroup cgrp; 280 281 /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ 282 atomic_t nr_cgrps; 283 284 /* A list running through the active hierarchies */ 285 struct list_head root_list; 286 287 /* Hierarchy-specific flags */ 288 unsigned int flags; 289 290 /* IDs for cgroups in this hierarchy */ 291 struct idr cgroup_idr; 292 293 /* The path to use for release notifications. */ 294 char release_agent_path[PATH_MAX]; 295 296 /* The name for this hierarchy - may be empty */ 297 char name[MAX_CGROUP_ROOT_NAMELEN]; 298 }; 299 300 /* 301 * A css_set is a structure holding pointers to a set of 302 * cgroup_subsys_state objects. This saves space in the task struct 303 * object and speeds up fork()/exit(), since a single inc/dec and a 304 * list_add()/del() can bump the reference count on the entire cgroup 305 * set for a task. 306 */ 307 308 struct css_set { 309 310 /* Reference count */ 311 atomic_t refcount; 312 313 /* 314 * List running through all cgroup groups in the same hash 315 * slot. Protected by css_set_lock 316 */ 317 struct hlist_node hlist; 318 319 /* 320 * Lists running through all tasks using this cgroup group. 321 * mg_tasks lists tasks which belong to this cset but are in the 322 * process of being migrated out or in. Protected by 323 * css_set_rwsem, but, during migration, once tasks are moved to 324 * mg_tasks, it can be read safely while holding cgroup_mutex. 325 */ 326 struct list_head tasks; 327 struct list_head mg_tasks; 328 329 /* 330 * List of cgrp_cset_links pointing at cgroups referenced from this 331 * css_set. Protected by css_set_lock. 332 */ 333 struct list_head cgrp_links; 334 335 /* the default cgroup associated with this css_set */ 336 struct cgroup *dfl_cgrp; 337 338 /* 339 * Set of subsystem states, one for each subsystem. This array is 340 * immutable after creation apart from the init_css_set during 341 * subsystem registration (at boot time). 342 */ 343 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 344 345 /* 346 * List of csets participating in the on-going migration either as 347 * source or destination. Protected by cgroup_mutex. 348 */ 349 struct list_head mg_preload_node; 350 struct list_head mg_node; 351 352 /* 353 * If this cset is acting as the source of migration the following 354 * two fields are set. mg_src_cgrp is the source cgroup of the 355 * on-going migration and mg_dst_cset is the destination cset the 356 * target tasks on this cset should be migrated to. Protected by 357 * cgroup_mutex. 358 */ 359 struct cgroup *mg_src_cgrp; 360 struct css_set *mg_dst_cset; 361 362 /* 363 * On the default hierarhcy, ->subsys[ssid] may point to a css 364 * attached to an ancestor instead of the cgroup this css_set is 365 * associated with. The following node is anchored at 366 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to 367 * iterate through all css's attached to a given cgroup. 368 */ 369 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; 370 371 /* For RCU-protected deletion */ 372 struct rcu_head rcu_head; 373 }; 374 375 /* 376 * struct cftype: handler definitions for cgroup control files 377 * 378 * When reading/writing to a file: 379 * - the cgroup to use is file->f_dentry->d_parent->d_fsdata 380 * - the 'cftype' of the file is file->f_dentry->d_fsdata 381 */ 382 383 /* cftype->flags */ 384 enum { 385 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ 386 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ 387 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ 388 389 /* internal flags, do not use outside cgroup core proper */ 390 __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ 391 __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ 392 }; 393 394 #define MAX_CFTYPE_NAME 64 395 396 struct cftype { 397 /* 398 * By convention, the name should begin with the name of the 399 * subsystem, followed by a period. Zero length string indicates 400 * end of cftype array. 401 */ 402 char name[MAX_CFTYPE_NAME]; 403 int private; 404 /* 405 * If not 0, file mode is set to this value, otherwise it will 406 * be figured out automatically 407 */ 408 umode_t mode; 409 410 /* 411 * The maximum length of string, excluding trailing nul, that can 412 * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. 413 */ 414 size_t max_write_len; 415 416 /* CFTYPE_* flags */ 417 unsigned int flags; 418 419 /* 420 * Fields used for internal bookkeeping. Initialized automatically 421 * during registration. 422 */ 423 struct cgroup_subsys *ss; /* NULL for cgroup core files */ 424 struct list_head node; /* anchored at ss->cfts */ 425 struct kernfs_ops *kf_ops; 426 427 /* 428 * read_u64() is a shortcut for the common case of returning a 429 * single integer. Use it in place of read() 430 */ 431 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); 432 /* 433 * read_s64() is a signed version of read_u64() 434 */ 435 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); 436 437 /* generic seq_file read interface */ 438 int (*seq_show)(struct seq_file *sf, void *v); 439 440 /* optional ops, implement all or none */ 441 void *(*seq_start)(struct seq_file *sf, loff_t *ppos); 442 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); 443 void (*seq_stop)(struct seq_file *sf, void *v); 444 445 /* 446 * write_u64() is a shortcut for the common case of accepting 447 * a single integer (as parsed by simple_strtoull) from 448 * userspace. Use in place of write(); return 0 or error. 449 */ 450 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, 451 u64 val); 452 /* 453 * write_s64() is a signed version of write_u64() 454 */ 455 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, 456 s64 val); 457 458 /* 459 * write() is the generic write callback which maps directly to 460 * kernfs write operation and overrides all other operations. 461 * Maximum write size is determined by ->max_write_len. Use 462 * of_css/cft() to access the associated css and cft. 463 */ 464 ssize_t (*write)(struct kernfs_open_file *of, 465 char *buf, size_t nbytes, loff_t off); 466 467 #ifdef CONFIG_DEBUG_LOCK_ALLOC 468 struct lock_class_key lockdep_key; 469 #endif 470 }; 471 472 extern struct cgroup_root cgrp_dfl_root; 473 extern struct css_set init_css_set; 474 475 /** 476 * cgroup_on_dfl - test whether a cgroup is on the default hierarchy 477 * @cgrp: the cgroup of interest 478 * 479 * The default hierarchy is the v2 interface of cgroup and this function 480 * can be used to test whether a cgroup is on the default hierarchy for 481 * cases where a subsystem should behave differnetly depending on the 482 * interface version. 483 * 484 * The set of behaviors which change on the default hierarchy are still 485 * being determined and the mount option is prefixed with __DEVEL__. 486 * 487 * List of changed behaviors: 488 * 489 * - Mount options "noprefix", "xattr", "clone_children", "release_agent" 490 * and "name" are disallowed. 491 * 492 * - When mounting an existing superblock, mount options should match. 493 * 494 * - Remount is disallowed. 495 * 496 * - rename(2) is disallowed. 497 * 498 * - "tasks" is removed. Everything should be at process granularity. Use 499 * "cgroup.procs" instead. 500 * 501 * - "cgroup.procs" is not sorted. pids will be unique unless they got 502 * recycled inbetween reads. 503 * 504 * - "release_agent" and "notify_on_release" are removed. Replacement 505 * notification mechanism will be implemented. 506 * 507 * - "cgroup.clone_children" is removed. 508 * 509 * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup 510 * and its descendants contain no task; otherwise, 1. The file also 511 * generates kernfs notification which can be monitored through poll and 512 * [di]notify when the value of the file changes. 513 * 514 * - cpuset: tasks will be kept in empty cpusets when hotplug happens and 515 * take masks of ancestors with non-empty cpus/mems, instead of being 516 * moved to an ancestor. 517 * 518 * - cpuset: a task can be moved into an empty cpuset, and again it takes 519 * masks of ancestors. 520 * 521 * - memcg: use_hierarchy is on by default and the cgroup file for the flag 522 * is not created. 523 * 524 * - blkcg: blk-throttle becomes properly hierarchical. 525 * 526 * - debug: disallowed on the default hierarchy. 527 */ 528 static inline bool cgroup_on_dfl(const struct cgroup *cgrp) 529 { 530 return cgrp->root == &cgrp_dfl_root; 531 } 532 533 /* no synchronization, the result can only be used as a hint */ 534 static inline bool cgroup_has_tasks(struct cgroup *cgrp) 535 { 536 return !list_empty(&cgrp->cset_links); 537 } 538 539 /* returns ino associated with a cgroup, 0 indicates unmounted root */ 540 static inline ino_t cgroup_ino(struct cgroup *cgrp) 541 { 542 if (cgrp->kn) 543 return cgrp->kn->ino; 544 else 545 return 0; 546 } 547 548 /* cft/css accessors for cftype->write() operation */ 549 static inline struct cftype *of_cft(struct kernfs_open_file *of) 550 { 551 return of->kn->priv; 552 } 553 554 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of); 555 556 /* cft/css accessors for cftype->seq_*() operations */ 557 static inline struct cftype *seq_cft(struct seq_file *seq) 558 { 559 return of_cft(seq->private); 560 } 561 562 static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) 563 { 564 return of_css(seq->private); 565 } 566 567 /* 568 * Name / path handling functions. All are thin wrappers around the kernfs 569 * counterparts and can be called under any context. 570 */ 571 572 static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) 573 { 574 return kernfs_name(cgrp->kn, buf, buflen); 575 } 576 577 static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, 578 size_t buflen) 579 { 580 return kernfs_path(cgrp->kn, buf, buflen); 581 } 582 583 static inline void pr_cont_cgroup_name(struct cgroup *cgrp) 584 { 585 pr_cont_kernfs_name(cgrp->kn); 586 } 587 588 static inline void pr_cont_cgroup_path(struct cgroup *cgrp) 589 { 590 pr_cont_kernfs_path(cgrp->kn); 591 } 592 593 char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); 594 595 int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 596 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 597 int cgroup_rm_cftypes(struct cftype *cfts); 598 599 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); 600 601 /* 602 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 603 * methods. 604 */ 605 struct cgroup_taskset; 606 struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); 607 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); 608 609 /** 610 * cgroup_taskset_for_each - iterate cgroup_taskset 611 * @task: the loop cursor 612 * @tset: taskset to iterate 613 */ 614 #define cgroup_taskset_for_each(task, tset) \ 615 for ((task) = cgroup_taskset_first((tset)); (task); \ 616 (task) = cgroup_taskset_next((tset))) 617 618 /* 619 * Control Group subsystem type. 620 * See Documentation/cgroups/cgroups.txt for details 621 */ 622 623 struct cgroup_subsys { 624 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); 625 int (*css_online)(struct cgroup_subsys_state *css); 626 void (*css_offline)(struct cgroup_subsys_state *css); 627 void (*css_free)(struct cgroup_subsys_state *css); 628 void (*css_reset)(struct cgroup_subsys_state *css); 629 630 int (*can_attach)(struct cgroup_subsys_state *css, 631 struct cgroup_taskset *tset); 632 void (*cancel_attach)(struct cgroup_subsys_state *css, 633 struct cgroup_taskset *tset); 634 void (*attach)(struct cgroup_subsys_state *css, 635 struct cgroup_taskset *tset); 636 void (*fork)(struct task_struct *task); 637 void (*exit)(struct cgroup_subsys_state *css, 638 struct cgroup_subsys_state *old_css, 639 struct task_struct *task); 640 void (*bind)(struct cgroup_subsys_state *root_css); 641 642 int disabled; 643 int early_init; 644 645 /* 646 * If %false, this subsystem is properly hierarchical - 647 * configuration, resource accounting and restriction on a parent 648 * cgroup cover those of its children. If %true, hierarchy support 649 * is broken in some ways - some subsystems ignore hierarchy 650 * completely while others are only implemented half-way. 651 * 652 * It's now disallowed to create nested cgroups if the subsystem is 653 * broken and cgroup core will emit a warning message on such 654 * cases. Eventually, all subsystems will be made properly 655 * hierarchical and this will go away. 656 */ 657 bool broken_hierarchy; 658 bool warned_broken_hierarchy; 659 660 /* the following two fields are initialized automtically during boot */ 661 int id; 662 #define MAX_CGROUP_TYPE_NAMELEN 32 663 const char *name; 664 665 /* link to parent, protected by cgroup_lock() */ 666 struct cgroup_root *root; 667 668 /* idr for css->id */ 669 struct idr css_idr; 670 671 /* 672 * List of cftypes. Each entry is the first entry of an array 673 * terminated by zero length name. 674 */ 675 struct list_head cfts; 676 677 /* 678 * Base cftypes which are automatically registered. The two can 679 * point to the same array. 680 */ 681 struct cftype *dfl_cftypes; /* for the default hierarchy */ 682 struct cftype *legacy_cftypes; /* for the legacy hierarchies */ 683 684 /* 685 * A subsystem may depend on other subsystems. When such subsystem 686 * is enabled on a cgroup, the depended-upon subsystems are enabled 687 * together if available. Subsystems enabled due to dependency are 688 * not visible to userland until explicitly enabled. The following 689 * specifies the mask of subsystems that this one depends on. 690 */ 691 unsigned int depends_on; 692 }; 693 694 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; 695 #include <linux/cgroup_subsys.h> 696 #undef SUBSYS 697 698 /** 699 * task_css_set_check - obtain a task's css_set with extra access conditions 700 * @task: the task to obtain css_set for 701 * @__c: extra condition expression to be passed to rcu_dereference_check() 702 * 703 * A task's css_set is RCU protected, initialized and exited while holding 704 * task_lock(), and can only be modified while holding both cgroup_mutex 705 * and task_lock() while the task is alive. This macro verifies that the 706 * caller is inside proper critical section and returns @task's css_set. 707 * 708 * The caller can also specify additional allowed conditions via @__c, such 709 * as locks used during the cgroup_subsys::attach() methods. 710 */ 711 #ifdef CONFIG_PROVE_RCU 712 extern struct mutex cgroup_mutex; 713 extern struct rw_semaphore css_set_rwsem; 714 #define task_css_set_check(task, __c) \ 715 rcu_dereference_check((task)->cgroups, \ 716 lockdep_is_held(&cgroup_mutex) || \ 717 lockdep_is_held(&css_set_rwsem) || \ 718 ((task)->flags & PF_EXITING) || (__c)) 719 #else 720 #define task_css_set_check(task, __c) \ 721 rcu_dereference((task)->cgroups) 722 #endif 723 724 /** 725 * task_css_check - obtain css for (task, subsys) w/ extra access conds 726 * @task: the target task 727 * @subsys_id: the target subsystem ID 728 * @__c: extra condition expression to be passed to rcu_dereference_check() 729 * 730 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The 731 * synchronization rules are the same as task_css_set_check(). 732 */ 733 #define task_css_check(task, subsys_id, __c) \ 734 task_css_set_check((task), (__c))->subsys[(subsys_id)] 735 736 /** 737 * task_css_set - obtain a task's css_set 738 * @task: the task to obtain css_set for 739 * 740 * See task_css_set_check(). 741 */ 742 static inline struct css_set *task_css_set(struct task_struct *task) 743 { 744 return task_css_set_check(task, false); 745 } 746 747 /** 748 * task_css - obtain css for (task, subsys) 749 * @task: the target task 750 * @subsys_id: the target subsystem ID 751 * 752 * See task_css_check(). 753 */ 754 static inline struct cgroup_subsys_state *task_css(struct task_struct *task, 755 int subsys_id) 756 { 757 return task_css_check(task, subsys_id, false); 758 } 759 760 /** 761 * task_css_is_root - test whether a task belongs to the root css 762 * @task: the target task 763 * @subsys_id: the target subsystem ID 764 * 765 * Test whether @task belongs to the root css on the specified subsystem. 766 * May be invoked in any context. 767 */ 768 static inline bool task_css_is_root(struct task_struct *task, int subsys_id) 769 { 770 return task_css_check(task, subsys_id, true) == 771 init_css_set.subsys[subsys_id]; 772 } 773 774 static inline struct cgroup *task_cgroup(struct task_struct *task, 775 int subsys_id) 776 { 777 return task_css(task, subsys_id)->cgroup; 778 } 779 780 struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, 781 struct cgroup_subsys_state *parent); 782 783 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); 784 785 /** 786 * css_for_each_child - iterate through children of a css 787 * @pos: the css * to use as the loop cursor 788 * @parent: css whose children to walk 789 * 790 * Walk @parent's children. Must be called under rcu_read_lock(). 791 * 792 * If a subsystem synchronizes ->css_online() and the start of iteration, a 793 * css which finished ->css_online() is guaranteed to be visible in the 794 * future iterations and will stay visible until the last reference is put. 795 * A css which hasn't finished ->css_online() or already finished 796 * ->css_offline() may show up during traversal. It's each subsystem's 797 * responsibility to synchronize against on/offlining. 798 * 799 * It is allowed to temporarily drop RCU read lock during iteration. The 800 * caller is responsible for ensuring that @pos remains accessible until 801 * the start of the next iteration by, for example, bumping the css refcnt. 802 */ 803 #define css_for_each_child(pos, parent) \ 804 for ((pos) = css_next_child(NULL, (parent)); (pos); \ 805 (pos) = css_next_child((pos), (parent))) 806 807 struct cgroup_subsys_state * 808 css_next_descendant_pre(struct cgroup_subsys_state *pos, 809 struct cgroup_subsys_state *css); 810 811 struct cgroup_subsys_state * 812 css_rightmost_descendant(struct cgroup_subsys_state *pos); 813 814 /** 815 * css_for_each_descendant_pre - pre-order walk of a css's descendants 816 * @pos: the css * to use as the loop cursor 817 * @root: css whose descendants to walk 818 * 819 * Walk @root's descendants. @root is included in the iteration and the 820 * first node to be visited. Must be called under rcu_read_lock(). 821 * 822 * If a subsystem synchronizes ->css_online() and the start of iteration, a 823 * css which finished ->css_online() is guaranteed to be visible in the 824 * future iterations and will stay visible until the last reference is put. 825 * A css which hasn't finished ->css_online() or already finished 826 * ->css_offline() may show up during traversal. It's each subsystem's 827 * responsibility to synchronize against on/offlining. 828 * 829 * For example, the following guarantees that a descendant can't escape 830 * state updates of its ancestors. 831 * 832 * my_online(@css) 833 * { 834 * Lock @css's parent and @css; 835 * Inherit state from the parent; 836 * Unlock both. 837 * } 838 * 839 * my_update_state(@css) 840 * { 841 * css_for_each_descendant_pre(@pos, @css) { 842 * Lock @pos; 843 * if (@pos == @css) 844 * Update @css's state; 845 * else 846 * Verify @pos is alive and inherit state from its parent; 847 * Unlock @pos; 848 * } 849 * } 850 * 851 * As long as the inheriting step, including checking the parent state, is 852 * enclosed inside @pos locking, double-locking the parent isn't necessary 853 * while inheriting. The state update to the parent is guaranteed to be 854 * visible by walking order and, as long as inheriting operations to the 855 * same @pos are atomic to each other, multiple updates racing each other 856 * still result in the correct state. It's guaranateed that at least one 857 * inheritance happens for any css after the latest update to its parent. 858 * 859 * If checking parent's state requires locking the parent, each inheriting 860 * iteration should lock and unlock both @pos->parent and @pos. 861 * 862 * Alternatively, a subsystem may choose to use a single global lock to 863 * synchronize ->css_online() and ->css_offline() against tree-walking 864 * operations. 865 * 866 * It is allowed to temporarily drop RCU read lock during iteration. The 867 * caller is responsible for ensuring that @pos remains accessible until 868 * the start of the next iteration by, for example, bumping the css refcnt. 869 */ 870 #define css_for_each_descendant_pre(pos, css) \ 871 for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ 872 (pos) = css_next_descendant_pre((pos), (css))) 873 874 struct cgroup_subsys_state * 875 css_next_descendant_post(struct cgroup_subsys_state *pos, 876 struct cgroup_subsys_state *css); 877 878 /** 879 * css_for_each_descendant_post - post-order walk of a css's descendants 880 * @pos: the css * to use as the loop cursor 881 * @css: css whose descendants to walk 882 * 883 * Similar to css_for_each_descendant_pre() but performs post-order 884 * traversal instead. @root is included in the iteration and the last 885 * node to be visited. 886 * 887 * If a subsystem synchronizes ->css_online() and the start of iteration, a 888 * css which finished ->css_online() is guaranteed to be visible in the 889 * future iterations and will stay visible until the last reference is put. 890 * A css which hasn't finished ->css_online() or already finished 891 * ->css_offline() may show up during traversal. It's each subsystem's 892 * responsibility to synchronize against on/offlining. 893 * 894 * Note that the walk visibility guarantee example described in pre-order 895 * walk doesn't apply the same to post-order walks. 896 */ 897 #define css_for_each_descendant_post(pos, css) \ 898 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ 899 (pos) = css_next_descendant_post((pos), (css))) 900 901 bool css_has_online_children(struct cgroup_subsys_state *css); 902 903 /* A css_task_iter should be treated as an opaque object */ 904 struct css_task_iter { 905 struct cgroup_subsys *ss; 906 907 struct list_head *cset_pos; 908 struct list_head *cset_head; 909 910 struct list_head *task_pos; 911 struct list_head *tasks_head; 912 struct list_head *mg_tasks_head; 913 }; 914 915 void css_task_iter_start(struct cgroup_subsys_state *css, 916 struct css_task_iter *it); 917 struct task_struct *css_task_iter_next(struct css_task_iter *it); 918 void css_task_iter_end(struct css_task_iter *it); 919 920 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 921 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 922 923 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, 924 struct cgroup_subsys *ss); 925 926 #else /* !CONFIG_CGROUPS */ 927 928 static inline int cgroup_init_early(void) { return 0; } 929 static inline int cgroup_init(void) { return 0; } 930 static inline void cgroup_fork(struct task_struct *p) {} 931 static inline void cgroup_post_fork(struct task_struct *p) {} 932 static inline void cgroup_exit(struct task_struct *p) {} 933 934 static inline int cgroupstats_build(struct cgroupstats *stats, 935 struct dentry *dentry) 936 { 937 return -EINVAL; 938 } 939 940 /* No cgroups - nothing to do */ 941 static inline int cgroup_attach_task_all(struct task_struct *from, 942 struct task_struct *t) 943 { 944 return 0; 945 } 946 947 #endif /* !CONFIG_CGROUPS */ 948 949 #endif /* _LINUX_CGROUP_H */ 950