1 #ifndef _LINUX_CGROUP_H 2 #define _LINUX_CGROUP_H 3 /* 4 * cgroup interface 5 * 6 * Copyright (C) 2003 BULL SA 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. 8 * 9 */ 10 11 #include <linux/sched.h> 12 #include <linux/cpumask.h> 13 #include <linux/nodemask.h> 14 #include <linux/rcupdate.h> 15 #include <linux/rculist.h> 16 #include <linux/cgroupstats.h> 17 #include <linux/rwsem.h> 18 #include <linux/idr.h> 19 #include <linux/workqueue.h> 20 #include <linux/fs.h> 21 #include <linux/percpu-refcount.h> 22 #include <linux/seq_file.h> 23 #include <linux/kernfs.h> 24 #include <linux/wait.h> 25 26 #ifdef CONFIG_CGROUPS 27 28 struct cgroup_root; 29 struct cgroup_subsys; 30 struct cgroup; 31 32 extern int cgroup_init_early(void); 33 extern int cgroup_init(void); 34 extern void cgroup_fork(struct task_struct *p); 35 extern void cgroup_post_fork(struct task_struct *p); 36 extern void cgroup_exit(struct task_struct *p); 37 extern int cgroupstats_build(struct cgroupstats *stats, 38 struct dentry *dentry); 39 40 extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, 41 struct pid *pid, struct task_struct *tsk); 42 43 /* define the enumeration of all cgroup subsystems */ 44 #define SUBSYS(_x) _x ## _cgrp_id, 45 enum cgroup_subsys_id { 46 #include <linux/cgroup_subsys.h> 47 CGROUP_SUBSYS_COUNT, 48 }; 49 #undef SUBSYS 50 51 /* 52 * Per-subsystem/per-cgroup state maintained by the system. This is the 53 * fundamental structural building block that controllers deal with. 54 * 55 * Fields marked with "PI:" are public and immutable and may be accessed 56 * directly without synchronization. 57 */ 58 struct cgroup_subsys_state { 59 /* PI: the cgroup that this css is attached to */ 60 struct cgroup *cgroup; 61 62 /* PI: the cgroup subsystem that this css is attached to */ 63 struct cgroup_subsys *ss; 64 65 /* reference count - access via css_[try]get() and css_put() */ 66 struct percpu_ref refcnt; 67 68 /* PI: the parent css */ 69 struct cgroup_subsys_state *parent; 70 71 /* siblings list anchored at the parent's ->children */ 72 struct list_head sibling; 73 struct list_head children; 74 75 /* 76 * PI: Subsys-unique ID. 0 is unused and root is always 1. The 77 * matching css can be looked up using css_from_id(). 78 */ 79 int id; 80 81 unsigned int flags; 82 83 /* 84 * Monotonically increasing unique serial number which defines a 85 * uniform order among all csses. It's guaranteed that all 86 * ->children lists are in the ascending order of ->serial_nr and 87 * used to allow interrupting and resuming iterations. 88 */ 89 u64 serial_nr; 90 91 /* percpu_ref killing and RCU release */ 92 struct rcu_head rcu_head; 93 struct work_struct destroy_work; 94 }; 95 96 /* bits in struct cgroup_subsys_state flags field */ 97 enum { 98 CSS_NO_REF = (1 << 0), /* no reference counting for this css */ 99 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 100 CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ 101 }; 102 103 /** 104 * css_get - obtain a reference on the specified css 105 * @css: target css 106 * 107 * The caller must already have a reference. 108 */ 109 static inline void css_get(struct cgroup_subsys_state *css) 110 { 111 if (!(css->flags & CSS_NO_REF)) 112 percpu_ref_get(&css->refcnt); 113 } 114 115 /** 116 * css_tryget - try to obtain a reference on the specified css 117 * @css: target css 118 * 119 * Obtain a reference on @css unless it already has reached zero and is 120 * being released. This function doesn't care whether @css is on or 121 * offline. The caller naturally needs to ensure that @css is accessible 122 * but doesn't have to be holding a reference on it - IOW, RCU protected 123 * access is good enough for this function. Returns %true if a reference 124 * count was successfully obtained; %false otherwise. 125 */ 126 static inline bool css_tryget(struct cgroup_subsys_state *css) 127 { 128 if (!(css->flags & CSS_NO_REF)) 129 return percpu_ref_tryget(&css->refcnt); 130 return true; 131 } 132 133 /** 134 * css_tryget_online - try to obtain a reference on the specified css if online 135 * @css: target css 136 * 137 * Obtain a reference on @css if it's online. The caller naturally needs 138 * to ensure that @css is accessible but doesn't have to be holding a 139 * reference on it - IOW, RCU protected access is good enough for this 140 * function. Returns %true if a reference count was successfully obtained; 141 * %false otherwise. 142 */ 143 static inline bool css_tryget_online(struct cgroup_subsys_state *css) 144 { 145 if (!(css->flags & CSS_NO_REF)) 146 return percpu_ref_tryget_live(&css->refcnt); 147 return true; 148 } 149 150 /** 151 * css_put - put a css reference 152 * @css: target css 153 * 154 * Put a reference obtained via css_get() and css_tryget_online(). 155 */ 156 static inline void css_put(struct cgroup_subsys_state *css) 157 { 158 if (!(css->flags & CSS_NO_REF)) 159 percpu_ref_put(&css->refcnt); 160 } 161 162 /* bits in struct cgroup flags field */ 163 enum { 164 /* Control Group requires release notifications to userspace */ 165 CGRP_NOTIFY_ON_RELEASE, 166 /* 167 * Clone the parent's configuration when creating a new child 168 * cpuset cgroup. For historical reasons, this option can be 169 * specified at mount time and thus is implemented here. 170 */ 171 CGRP_CPUSET_CLONE_CHILDREN, 172 }; 173 174 struct cgroup { 175 /* self css with NULL ->ss, points back to this cgroup */ 176 struct cgroup_subsys_state self; 177 178 unsigned long flags; /* "unsigned long" so bitops work */ 179 180 /* 181 * idr allocated in-hierarchy ID. 182 * 183 * ID 0 is not used, the ID of the root cgroup is always 1, and a 184 * new cgroup will be assigned with a smallest available ID. 185 * 186 * Allocating/Removing ID must be protected by cgroup_mutex. 187 */ 188 int id; 189 190 /* 191 * If this cgroup contains any tasks, it contributes one to 192 * populated_cnt. All children with non-zero popuplated_cnt of 193 * their own contribute one. The count is zero iff there's no task 194 * in this cgroup or its subtree. 195 */ 196 int populated_cnt; 197 198 struct kernfs_node *kn; /* cgroup kernfs entry */ 199 struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ 200 201 /* 202 * The bitmask of subsystems enabled on the child cgroups. 203 * ->subtree_control is the one configured through 204 * "cgroup.subtree_control" while ->child_subsys_mask is the 205 * effective one which may have more subsystems enabled. 206 * Controller knobs are made available iff it's enabled in 207 * ->subtree_control. 208 */ 209 unsigned int subtree_control; 210 unsigned int child_subsys_mask; 211 212 /* Private pointers for each registered subsystem */ 213 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; 214 215 struct cgroup_root *root; 216 217 /* 218 * List of cgrp_cset_links pointing at css_sets with tasks in this 219 * cgroup. Protected by css_set_lock. 220 */ 221 struct list_head cset_links; 222 223 /* 224 * On the default hierarchy, a css_set for a cgroup with some 225 * susbsys disabled will point to css's which are associated with 226 * the closest ancestor which has the subsys enabled. The 227 * following lists all css_sets which point to this cgroup's css 228 * for the given subsystem. 229 */ 230 struct list_head e_csets[CGROUP_SUBSYS_COUNT]; 231 232 /* 233 * list of pidlists, up to two for each namespace (one for procs, one 234 * for tasks); created on demand. 235 */ 236 struct list_head pidlists; 237 struct mutex pidlist_mutex; 238 239 /* used to wait for offlining of csses */ 240 wait_queue_head_t offline_waitq; 241 242 /* used to schedule release agent */ 243 struct work_struct release_agent_work; 244 }; 245 246 #define MAX_CGROUP_ROOT_NAMELEN 64 247 248 /* cgroup_root->flags */ 249 enum { 250 CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ 251 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 252 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 253 }; 254 255 /* 256 * A cgroup_root represents the root of a cgroup hierarchy, and may be 257 * associated with a kernfs_root to form an active hierarchy. This is 258 * internal to cgroup core. Don't access directly from controllers. 259 */ 260 struct cgroup_root { 261 struct kernfs_root *kf_root; 262 263 /* The bitmask of subsystems attached to this hierarchy */ 264 unsigned int subsys_mask; 265 266 /* Unique id for this hierarchy. */ 267 int hierarchy_id; 268 269 /* The root cgroup. Root is destroyed on its release. */ 270 struct cgroup cgrp; 271 272 /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ 273 atomic_t nr_cgrps; 274 275 /* A list running through the active hierarchies */ 276 struct list_head root_list; 277 278 /* Hierarchy-specific flags */ 279 unsigned int flags; 280 281 /* IDs for cgroups in this hierarchy */ 282 struct idr cgroup_idr; 283 284 /* The path to use for release notifications. */ 285 char release_agent_path[PATH_MAX]; 286 287 /* The name for this hierarchy - may be empty */ 288 char name[MAX_CGROUP_ROOT_NAMELEN]; 289 }; 290 291 /* 292 * A css_set is a structure holding pointers to a set of 293 * cgroup_subsys_state objects. This saves space in the task struct 294 * object and speeds up fork()/exit(), since a single inc/dec and a 295 * list_add()/del() can bump the reference count on the entire cgroup 296 * set for a task. 297 */ 298 299 struct css_set { 300 301 /* Reference count */ 302 atomic_t refcount; 303 304 /* 305 * List running through all cgroup groups in the same hash 306 * slot. Protected by css_set_lock 307 */ 308 struct hlist_node hlist; 309 310 /* 311 * Lists running through all tasks using this cgroup group. 312 * mg_tasks lists tasks which belong to this cset but are in the 313 * process of being migrated out or in. Protected by 314 * css_set_rwsem, but, during migration, once tasks are moved to 315 * mg_tasks, it can be read safely while holding cgroup_mutex. 316 */ 317 struct list_head tasks; 318 struct list_head mg_tasks; 319 320 /* 321 * List of cgrp_cset_links pointing at cgroups referenced from this 322 * css_set. Protected by css_set_lock. 323 */ 324 struct list_head cgrp_links; 325 326 /* the default cgroup associated with this css_set */ 327 struct cgroup *dfl_cgrp; 328 329 /* 330 * Set of subsystem states, one for each subsystem. This array is 331 * immutable after creation apart from the init_css_set during 332 * subsystem registration (at boot time). 333 */ 334 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 335 336 /* 337 * List of csets participating in the on-going migration either as 338 * source or destination. Protected by cgroup_mutex. 339 */ 340 struct list_head mg_preload_node; 341 struct list_head mg_node; 342 343 /* 344 * If this cset is acting as the source of migration the following 345 * two fields are set. mg_src_cgrp is the source cgroup of the 346 * on-going migration and mg_dst_cset is the destination cset the 347 * target tasks on this cset should be migrated to. Protected by 348 * cgroup_mutex. 349 */ 350 struct cgroup *mg_src_cgrp; 351 struct css_set *mg_dst_cset; 352 353 /* 354 * On the default hierarhcy, ->subsys[ssid] may point to a css 355 * attached to an ancestor instead of the cgroup this css_set is 356 * associated with. The following node is anchored at 357 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to 358 * iterate through all css's attached to a given cgroup. 359 */ 360 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; 361 362 /* For RCU-protected deletion */ 363 struct rcu_head rcu_head; 364 }; 365 366 /* 367 * struct cftype: handler definitions for cgroup control files 368 * 369 * When reading/writing to a file: 370 * - the cgroup to use is file->f_dentry->d_parent->d_fsdata 371 * - the 'cftype' of the file is file->f_dentry->d_fsdata 372 */ 373 374 /* cftype->flags */ 375 enum { 376 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ 377 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ 378 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ 379 380 /* internal flags, do not use outside cgroup core proper */ 381 __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ 382 __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ 383 }; 384 385 #define MAX_CFTYPE_NAME 64 386 387 struct cftype { 388 /* 389 * By convention, the name should begin with the name of the 390 * subsystem, followed by a period. Zero length string indicates 391 * end of cftype array. 392 */ 393 char name[MAX_CFTYPE_NAME]; 394 int private; 395 /* 396 * If not 0, file mode is set to this value, otherwise it will 397 * be figured out automatically 398 */ 399 umode_t mode; 400 401 /* 402 * The maximum length of string, excluding trailing nul, that can 403 * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. 404 */ 405 size_t max_write_len; 406 407 /* CFTYPE_* flags */ 408 unsigned int flags; 409 410 /* 411 * Fields used for internal bookkeeping. Initialized automatically 412 * during registration. 413 */ 414 struct cgroup_subsys *ss; /* NULL for cgroup core files */ 415 struct list_head node; /* anchored at ss->cfts */ 416 struct kernfs_ops *kf_ops; 417 418 /* 419 * read_u64() is a shortcut for the common case of returning a 420 * single integer. Use it in place of read() 421 */ 422 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); 423 /* 424 * read_s64() is a signed version of read_u64() 425 */ 426 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); 427 428 /* generic seq_file read interface */ 429 int (*seq_show)(struct seq_file *sf, void *v); 430 431 /* optional ops, implement all or none */ 432 void *(*seq_start)(struct seq_file *sf, loff_t *ppos); 433 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); 434 void (*seq_stop)(struct seq_file *sf, void *v); 435 436 /* 437 * write_u64() is a shortcut for the common case of accepting 438 * a single integer (as parsed by simple_strtoull) from 439 * userspace. Use in place of write(); return 0 or error. 440 */ 441 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, 442 u64 val); 443 /* 444 * write_s64() is a signed version of write_u64() 445 */ 446 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, 447 s64 val); 448 449 /* 450 * write() is the generic write callback which maps directly to 451 * kernfs write operation and overrides all other operations. 452 * Maximum write size is determined by ->max_write_len. Use 453 * of_css/cft() to access the associated css and cft. 454 */ 455 ssize_t (*write)(struct kernfs_open_file *of, 456 char *buf, size_t nbytes, loff_t off); 457 458 #ifdef CONFIG_DEBUG_LOCK_ALLOC 459 struct lock_class_key lockdep_key; 460 #endif 461 }; 462 463 extern struct cgroup_root cgrp_dfl_root; 464 extern struct css_set init_css_set; 465 466 /** 467 * cgroup_on_dfl - test whether a cgroup is on the default hierarchy 468 * @cgrp: the cgroup of interest 469 * 470 * The default hierarchy is the v2 interface of cgroup and this function 471 * can be used to test whether a cgroup is on the default hierarchy for 472 * cases where a subsystem should behave differnetly depending on the 473 * interface version. 474 * 475 * The set of behaviors which change on the default hierarchy are still 476 * being determined and the mount option is prefixed with __DEVEL__. 477 * 478 * List of changed behaviors: 479 * 480 * - Mount options "noprefix", "xattr", "clone_children", "release_agent" 481 * and "name" are disallowed. 482 * 483 * - When mounting an existing superblock, mount options should match. 484 * 485 * - Remount is disallowed. 486 * 487 * - rename(2) is disallowed. 488 * 489 * - "tasks" is removed. Everything should be at process granularity. Use 490 * "cgroup.procs" instead. 491 * 492 * - "cgroup.procs" is not sorted. pids will be unique unless they got 493 * recycled inbetween reads. 494 * 495 * - "release_agent" and "notify_on_release" are removed. Replacement 496 * notification mechanism will be implemented. 497 * 498 * - "cgroup.clone_children" is removed. 499 * 500 * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup 501 * and its descendants contain no task; otherwise, 1. The file also 502 * generates kernfs notification which can be monitored through poll and 503 * [di]notify when the value of the file changes. 504 * 505 * - cpuset: tasks will be kept in empty cpusets when hotplug happens and 506 * take masks of ancestors with non-empty cpus/mems, instead of being 507 * moved to an ancestor. 508 * 509 * - cpuset: a task can be moved into an empty cpuset, and again it takes 510 * masks of ancestors. 511 * 512 * - memcg: use_hierarchy is on by default and the cgroup file for the flag 513 * is not created. 514 * 515 * - blkcg: blk-throttle becomes properly hierarchical. 516 * 517 * - debug: disallowed on the default hierarchy. 518 */ 519 static inline bool cgroup_on_dfl(const struct cgroup *cgrp) 520 { 521 return cgrp->root == &cgrp_dfl_root; 522 } 523 524 /* no synchronization, the result can only be used as a hint */ 525 static inline bool cgroup_has_tasks(struct cgroup *cgrp) 526 { 527 return !list_empty(&cgrp->cset_links); 528 } 529 530 /* returns ino associated with a cgroup */ 531 static inline ino_t cgroup_ino(struct cgroup *cgrp) 532 { 533 return cgrp->kn->ino; 534 } 535 536 /* cft/css accessors for cftype->write() operation */ 537 static inline struct cftype *of_cft(struct kernfs_open_file *of) 538 { 539 return of->kn->priv; 540 } 541 542 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of); 543 544 /* cft/css accessors for cftype->seq_*() operations */ 545 static inline struct cftype *seq_cft(struct seq_file *seq) 546 { 547 return of_cft(seq->private); 548 } 549 550 static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) 551 { 552 return of_css(seq->private); 553 } 554 555 /* 556 * Name / path handling functions. All are thin wrappers around the kernfs 557 * counterparts and can be called under any context. 558 */ 559 560 static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) 561 { 562 return kernfs_name(cgrp->kn, buf, buflen); 563 } 564 565 static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, 566 size_t buflen) 567 { 568 return kernfs_path(cgrp->kn, buf, buflen); 569 } 570 571 static inline void pr_cont_cgroup_name(struct cgroup *cgrp) 572 { 573 pr_cont_kernfs_name(cgrp->kn); 574 } 575 576 static inline void pr_cont_cgroup_path(struct cgroup *cgrp) 577 { 578 pr_cont_kernfs_path(cgrp->kn); 579 } 580 581 char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); 582 583 int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 584 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 585 int cgroup_rm_cftypes(struct cftype *cfts); 586 587 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); 588 589 /* 590 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 591 * methods. 592 */ 593 struct cgroup_taskset; 594 struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); 595 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); 596 597 /** 598 * cgroup_taskset_for_each - iterate cgroup_taskset 599 * @task: the loop cursor 600 * @tset: taskset to iterate 601 */ 602 #define cgroup_taskset_for_each(task, tset) \ 603 for ((task) = cgroup_taskset_first((tset)); (task); \ 604 (task) = cgroup_taskset_next((tset))) 605 606 /* 607 * Control Group subsystem type. 608 * See Documentation/cgroups/cgroups.txt for details 609 */ 610 611 struct cgroup_subsys { 612 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); 613 int (*css_online)(struct cgroup_subsys_state *css); 614 void (*css_offline)(struct cgroup_subsys_state *css); 615 void (*css_free)(struct cgroup_subsys_state *css); 616 void (*css_reset)(struct cgroup_subsys_state *css); 617 618 int (*can_attach)(struct cgroup_subsys_state *css, 619 struct cgroup_taskset *tset); 620 void (*cancel_attach)(struct cgroup_subsys_state *css, 621 struct cgroup_taskset *tset); 622 void (*attach)(struct cgroup_subsys_state *css, 623 struct cgroup_taskset *tset); 624 void (*fork)(struct task_struct *task); 625 void (*exit)(struct cgroup_subsys_state *css, 626 struct cgroup_subsys_state *old_css, 627 struct task_struct *task); 628 void (*bind)(struct cgroup_subsys_state *root_css); 629 630 int disabled; 631 int early_init; 632 633 /* 634 * If %false, this subsystem is properly hierarchical - 635 * configuration, resource accounting and restriction on a parent 636 * cgroup cover those of its children. If %true, hierarchy support 637 * is broken in some ways - some subsystems ignore hierarchy 638 * completely while others are only implemented half-way. 639 * 640 * It's now disallowed to create nested cgroups if the subsystem is 641 * broken and cgroup core will emit a warning message on such 642 * cases. Eventually, all subsystems will be made properly 643 * hierarchical and this will go away. 644 */ 645 bool broken_hierarchy; 646 bool warned_broken_hierarchy; 647 648 /* the following two fields are initialized automtically during boot */ 649 int id; 650 #define MAX_CGROUP_TYPE_NAMELEN 32 651 const char *name; 652 653 /* link to parent, protected by cgroup_lock() */ 654 struct cgroup_root *root; 655 656 /* idr for css->id */ 657 struct idr css_idr; 658 659 /* 660 * List of cftypes. Each entry is the first entry of an array 661 * terminated by zero length name. 662 */ 663 struct list_head cfts; 664 665 /* 666 * Base cftypes which are automatically registered. The two can 667 * point to the same array. 668 */ 669 struct cftype *dfl_cftypes; /* for the default hierarchy */ 670 struct cftype *legacy_cftypes; /* for the legacy hierarchies */ 671 672 /* 673 * A subsystem may depend on other subsystems. When such subsystem 674 * is enabled on a cgroup, the depended-upon subsystems are enabled 675 * together if available. Subsystems enabled due to dependency are 676 * not visible to userland until explicitly enabled. The following 677 * specifies the mask of subsystems that this one depends on. 678 */ 679 unsigned int depends_on; 680 }; 681 682 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; 683 #include <linux/cgroup_subsys.h> 684 #undef SUBSYS 685 686 /** 687 * task_css_set_check - obtain a task's css_set with extra access conditions 688 * @task: the task to obtain css_set for 689 * @__c: extra condition expression to be passed to rcu_dereference_check() 690 * 691 * A task's css_set is RCU protected, initialized and exited while holding 692 * task_lock(), and can only be modified while holding both cgroup_mutex 693 * and task_lock() while the task is alive. This macro verifies that the 694 * caller is inside proper critical section and returns @task's css_set. 695 * 696 * The caller can also specify additional allowed conditions via @__c, such 697 * as locks used during the cgroup_subsys::attach() methods. 698 */ 699 #ifdef CONFIG_PROVE_RCU 700 extern struct mutex cgroup_mutex; 701 extern struct rw_semaphore css_set_rwsem; 702 #define task_css_set_check(task, __c) \ 703 rcu_dereference_check((task)->cgroups, \ 704 lockdep_is_held(&cgroup_mutex) || \ 705 lockdep_is_held(&css_set_rwsem) || \ 706 ((task)->flags & PF_EXITING) || (__c)) 707 #else 708 #define task_css_set_check(task, __c) \ 709 rcu_dereference((task)->cgroups) 710 #endif 711 712 /** 713 * task_css_check - obtain css for (task, subsys) w/ extra access conds 714 * @task: the target task 715 * @subsys_id: the target subsystem ID 716 * @__c: extra condition expression to be passed to rcu_dereference_check() 717 * 718 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The 719 * synchronization rules are the same as task_css_set_check(). 720 */ 721 #define task_css_check(task, subsys_id, __c) \ 722 task_css_set_check((task), (__c))->subsys[(subsys_id)] 723 724 /** 725 * task_css_set - obtain a task's css_set 726 * @task: the task to obtain css_set for 727 * 728 * See task_css_set_check(). 729 */ 730 static inline struct css_set *task_css_set(struct task_struct *task) 731 { 732 return task_css_set_check(task, false); 733 } 734 735 /** 736 * task_css - obtain css for (task, subsys) 737 * @task: the target task 738 * @subsys_id: the target subsystem ID 739 * 740 * See task_css_check(). 741 */ 742 static inline struct cgroup_subsys_state *task_css(struct task_struct *task, 743 int subsys_id) 744 { 745 return task_css_check(task, subsys_id, false); 746 } 747 748 /** 749 * task_css_is_root - test whether a task belongs to the root css 750 * @task: the target task 751 * @subsys_id: the target subsystem ID 752 * 753 * Test whether @task belongs to the root css on the specified subsystem. 754 * May be invoked in any context. 755 */ 756 static inline bool task_css_is_root(struct task_struct *task, int subsys_id) 757 { 758 return task_css_check(task, subsys_id, true) == 759 init_css_set.subsys[subsys_id]; 760 } 761 762 static inline struct cgroup *task_cgroup(struct task_struct *task, 763 int subsys_id) 764 { 765 return task_css(task, subsys_id)->cgroup; 766 } 767 768 struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, 769 struct cgroup_subsys_state *parent); 770 771 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); 772 773 /** 774 * css_for_each_child - iterate through children of a css 775 * @pos: the css * to use as the loop cursor 776 * @parent: css whose children to walk 777 * 778 * Walk @parent's children. Must be called under rcu_read_lock(). 779 * 780 * If a subsystem synchronizes ->css_online() and the start of iteration, a 781 * css which finished ->css_online() is guaranteed to be visible in the 782 * future iterations and will stay visible until the last reference is put. 783 * A css which hasn't finished ->css_online() or already finished 784 * ->css_offline() may show up during traversal. It's each subsystem's 785 * responsibility to synchronize against on/offlining. 786 * 787 * It is allowed to temporarily drop RCU read lock during iteration. The 788 * caller is responsible for ensuring that @pos remains accessible until 789 * the start of the next iteration by, for example, bumping the css refcnt. 790 */ 791 #define css_for_each_child(pos, parent) \ 792 for ((pos) = css_next_child(NULL, (parent)); (pos); \ 793 (pos) = css_next_child((pos), (parent))) 794 795 struct cgroup_subsys_state * 796 css_next_descendant_pre(struct cgroup_subsys_state *pos, 797 struct cgroup_subsys_state *css); 798 799 struct cgroup_subsys_state * 800 css_rightmost_descendant(struct cgroup_subsys_state *pos); 801 802 /** 803 * css_for_each_descendant_pre - pre-order walk of a css's descendants 804 * @pos: the css * to use as the loop cursor 805 * @root: css whose descendants to walk 806 * 807 * Walk @root's descendants. @root is included in the iteration and the 808 * first node to be visited. Must be called under rcu_read_lock(). 809 * 810 * If a subsystem synchronizes ->css_online() and the start of iteration, a 811 * css which finished ->css_online() is guaranteed to be visible in the 812 * future iterations and will stay visible until the last reference is put. 813 * A css which hasn't finished ->css_online() or already finished 814 * ->css_offline() may show up during traversal. It's each subsystem's 815 * responsibility to synchronize against on/offlining. 816 * 817 * For example, the following guarantees that a descendant can't escape 818 * state updates of its ancestors. 819 * 820 * my_online(@css) 821 * { 822 * Lock @css's parent and @css; 823 * Inherit state from the parent; 824 * Unlock both. 825 * } 826 * 827 * my_update_state(@css) 828 * { 829 * css_for_each_descendant_pre(@pos, @css) { 830 * Lock @pos; 831 * if (@pos == @css) 832 * Update @css's state; 833 * else 834 * Verify @pos is alive and inherit state from its parent; 835 * Unlock @pos; 836 * } 837 * } 838 * 839 * As long as the inheriting step, including checking the parent state, is 840 * enclosed inside @pos locking, double-locking the parent isn't necessary 841 * while inheriting. The state update to the parent is guaranteed to be 842 * visible by walking order and, as long as inheriting operations to the 843 * same @pos are atomic to each other, multiple updates racing each other 844 * still result in the correct state. It's guaranateed that at least one 845 * inheritance happens for any css after the latest update to its parent. 846 * 847 * If checking parent's state requires locking the parent, each inheriting 848 * iteration should lock and unlock both @pos->parent and @pos. 849 * 850 * Alternatively, a subsystem may choose to use a single global lock to 851 * synchronize ->css_online() and ->css_offline() against tree-walking 852 * operations. 853 * 854 * It is allowed to temporarily drop RCU read lock during iteration. The 855 * caller is responsible for ensuring that @pos remains accessible until 856 * the start of the next iteration by, for example, bumping the css refcnt. 857 */ 858 #define css_for_each_descendant_pre(pos, css) \ 859 for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ 860 (pos) = css_next_descendant_pre((pos), (css))) 861 862 struct cgroup_subsys_state * 863 css_next_descendant_post(struct cgroup_subsys_state *pos, 864 struct cgroup_subsys_state *css); 865 866 /** 867 * css_for_each_descendant_post - post-order walk of a css's descendants 868 * @pos: the css * to use as the loop cursor 869 * @css: css whose descendants to walk 870 * 871 * Similar to css_for_each_descendant_pre() but performs post-order 872 * traversal instead. @root is included in the iteration and the last 873 * node to be visited. 874 * 875 * If a subsystem synchronizes ->css_online() and the start of iteration, a 876 * css which finished ->css_online() is guaranteed to be visible in the 877 * future iterations and will stay visible until the last reference is put. 878 * A css which hasn't finished ->css_online() or already finished 879 * ->css_offline() may show up during traversal. It's each subsystem's 880 * responsibility to synchronize against on/offlining. 881 * 882 * Note that the walk visibility guarantee example described in pre-order 883 * walk doesn't apply the same to post-order walks. 884 */ 885 #define css_for_each_descendant_post(pos, css) \ 886 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ 887 (pos) = css_next_descendant_post((pos), (css))) 888 889 bool css_has_online_children(struct cgroup_subsys_state *css); 890 891 /* A css_task_iter should be treated as an opaque object */ 892 struct css_task_iter { 893 struct cgroup_subsys *ss; 894 895 struct list_head *cset_pos; 896 struct list_head *cset_head; 897 898 struct list_head *task_pos; 899 struct list_head *tasks_head; 900 struct list_head *mg_tasks_head; 901 }; 902 903 void css_task_iter_start(struct cgroup_subsys_state *css, 904 struct css_task_iter *it); 905 struct task_struct *css_task_iter_next(struct css_task_iter *it); 906 void css_task_iter_end(struct css_task_iter *it); 907 908 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 909 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 910 911 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, 912 struct cgroup_subsys *ss); 913 914 #else /* !CONFIG_CGROUPS */ 915 916 static inline int cgroup_init_early(void) { return 0; } 917 static inline int cgroup_init(void) { return 0; } 918 static inline void cgroup_fork(struct task_struct *p) {} 919 static inline void cgroup_post_fork(struct task_struct *p) {} 920 static inline void cgroup_exit(struct task_struct *p) {} 921 922 static inline int cgroupstats_build(struct cgroupstats *stats, 923 struct dentry *dentry) 924 { 925 return -EINVAL; 926 } 927 928 /* No cgroups - nothing to do */ 929 static inline int cgroup_attach_task_all(struct task_struct *from, 930 struct task_struct *t) 931 { 932 return 0; 933 } 934 935 #endif /* !CONFIG_CGROUPS */ 936 937 #endif /* _LINUX_CGROUP_H */ 938