1 #ifndef _LINUX_CGROUP_H 2 #define _LINUX_CGROUP_H 3 /* 4 * cgroup interface 5 * 6 * Copyright (C) 2003 BULL SA 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. 8 * 9 */ 10 11 #include <linux/sched.h> 12 #include <linux/cpumask.h> 13 #include <linux/nodemask.h> 14 #include <linux/rcupdate.h> 15 #include <linux/rculist.h> 16 #include <linux/cgroupstats.h> 17 #include <linux/rwsem.h> 18 #include <linux/idr.h> 19 #include <linux/workqueue.h> 20 #include <linux/fs.h> 21 #include <linux/percpu-refcount.h> 22 #include <linux/seq_file.h> 23 #include <linux/kernfs.h> 24 #include <linux/wait.h> 25 26 #ifdef CONFIG_CGROUPS 27 28 struct cgroup_root; 29 struct cgroup_subsys; 30 struct cgroup; 31 32 extern int cgroup_init_early(void); 33 extern int cgroup_init(void); 34 extern void cgroup_fork(struct task_struct *p); 35 extern void cgroup_post_fork(struct task_struct *p); 36 extern void cgroup_exit(struct task_struct *p); 37 extern int cgroupstats_build(struct cgroupstats *stats, 38 struct dentry *dentry); 39 40 extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, 41 struct pid *pid, struct task_struct *tsk); 42 43 /* define the enumeration of all cgroup subsystems */ 44 #define SUBSYS(_x) _x ## _cgrp_id, 45 enum cgroup_subsys_id { 46 #include <linux/cgroup_subsys.h> 47 CGROUP_SUBSYS_COUNT, 48 }; 49 #undef SUBSYS 50 51 /* 52 * Per-subsystem/per-cgroup state maintained by the system. This is the 53 * fundamental structural building block that controllers deal with. 54 * 55 * Fields marked with "PI:" are public and immutable and may be accessed 56 * directly without synchronization. 57 */ 58 struct cgroup_subsys_state { 59 /* PI: the cgroup that this css is attached to */ 60 struct cgroup *cgroup; 61 62 /* PI: the cgroup subsystem that this css is attached to */ 63 struct cgroup_subsys *ss; 64 65 /* reference count - access via css_[try]get() and css_put() */ 66 struct percpu_ref refcnt; 67 68 /* PI: the parent css */ 69 struct cgroup_subsys_state *parent; 70 71 /* siblings list anchored at the parent's ->children */ 72 struct list_head sibling; 73 struct list_head children; 74 75 /* 76 * PI: Subsys-unique ID. 0 is unused and root is always 1. The 77 * matching css can be looked up using css_from_id(). 78 */ 79 int id; 80 81 unsigned int flags; 82 83 /* 84 * Monotonically increasing unique serial number which defines a 85 * uniform order among all csses. It's guaranteed that all 86 * ->children lists are in the ascending order of ->serial_nr and 87 * used to allow interrupting and resuming iterations. 88 */ 89 u64 serial_nr; 90 91 /* percpu_ref killing and RCU release */ 92 struct rcu_head rcu_head; 93 struct work_struct destroy_work; 94 }; 95 96 /* bits in struct cgroup_subsys_state flags field */ 97 enum { 98 CSS_NO_REF = (1 << 0), /* no reference counting for this css */ 99 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 100 CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ 101 }; 102 103 /** 104 * css_get - obtain a reference on the specified css 105 * @css: target css 106 * 107 * The caller must already have a reference. 108 */ 109 static inline void css_get(struct cgroup_subsys_state *css) 110 { 111 if (!(css->flags & CSS_NO_REF)) 112 percpu_ref_get(&css->refcnt); 113 } 114 115 /** 116 * css_get_many - obtain references on the specified css 117 * @css: target css 118 * @n: number of references to get 119 * 120 * The caller must already have a reference. 121 */ 122 static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) 123 { 124 if (!(css->flags & CSS_NO_REF)) 125 percpu_ref_get_many(&css->refcnt, n); 126 } 127 128 /** 129 * css_tryget - try to obtain a reference on the specified css 130 * @css: target css 131 * 132 * Obtain a reference on @css unless it already has reached zero and is 133 * being released. This function doesn't care whether @css is on or 134 * offline. The caller naturally needs to ensure that @css is accessible 135 * but doesn't have to be holding a reference on it - IOW, RCU protected 136 * access is good enough for this function. Returns %true if a reference 137 * count was successfully obtained; %false otherwise. 138 */ 139 static inline bool css_tryget(struct cgroup_subsys_state *css) 140 { 141 if (!(css->flags & CSS_NO_REF)) 142 return percpu_ref_tryget(&css->refcnt); 143 return true; 144 } 145 146 /** 147 * css_tryget_online - try to obtain a reference on the specified css if online 148 * @css: target css 149 * 150 * Obtain a reference on @css if it's online. The caller naturally needs 151 * to ensure that @css is accessible but doesn't have to be holding a 152 * reference on it - IOW, RCU protected access is good enough for this 153 * function. Returns %true if a reference count was successfully obtained; 154 * %false otherwise. 155 */ 156 static inline bool css_tryget_online(struct cgroup_subsys_state *css) 157 { 158 if (!(css->flags & CSS_NO_REF)) 159 return percpu_ref_tryget_live(&css->refcnt); 160 return true; 161 } 162 163 /** 164 * css_put - put a css reference 165 * @css: target css 166 * 167 * Put a reference obtained via css_get() and css_tryget_online(). 168 */ 169 static inline void css_put(struct cgroup_subsys_state *css) 170 { 171 if (!(css->flags & CSS_NO_REF)) 172 percpu_ref_put(&css->refcnt); 173 } 174 175 /** 176 * css_put_many - put css references 177 * @css: target css 178 * @n: number of references to put 179 * 180 * Put references obtained via css_get() and css_tryget_online(). 181 */ 182 static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) 183 { 184 if (!(css->flags & CSS_NO_REF)) 185 percpu_ref_put_many(&css->refcnt, n); 186 } 187 188 /* bits in struct cgroup flags field */ 189 enum { 190 /* Control Group requires release notifications to userspace */ 191 CGRP_NOTIFY_ON_RELEASE, 192 /* 193 * Clone the parent's configuration when creating a new child 194 * cpuset cgroup. For historical reasons, this option can be 195 * specified at mount time and thus is implemented here. 196 */ 197 CGRP_CPUSET_CLONE_CHILDREN, 198 }; 199 200 struct cgroup { 201 /* self css with NULL ->ss, points back to this cgroup */ 202 struct cgroup_subsys_state self; 203 204 unsigned long flags; /* "unsigned long" so bitops work */ 205 206 /* 207 * idr allocated in-hierarchy ID. 208 * 209 * ID 0 is not used, the ID of the root cgroup is always 1, and a 210 * new cgroup will be assigned with a smallest available ID. 211 * 212 * Allocating/Removing ID must be protected by cgroup_mutex. 213 */ 214 int id; 215 216 /* 217 * If this cgroup contains any tasks, it contributes one to 218 * populated_cnt. All children with non-zero popuplated_cnt of 219 * their own contribute one. The count is zero iff there's no task 220 * in this cgroup or its subtree. 221 */ 222 int populated_cnt; 223 224 struct kernfs_node *kn; /* cgroup kernfs entry */ 225 struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ 226 227 /* 228 * The bitmask of subsystems enabled on the child cgroups. 229 * ->subtree_control is the one configured through 230 * "cgroup.subtree_control" while ->child_subsys_mask is the 231 * effective one which may have more subsystems enabled. 232 * Controller knobs are made available iff it's enabled in 233 * ->subtree_control. 234 */ 235 unsigned int subtree_control; 236 unsigned int child_subsys_mask; 237 238 /* Private pointers for each registered subsystem */ 239 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; 240 241 struct cgroup_root *root; 242 243 /* 244 * List of cgrp_cset_links pointing at css_sets with tasks in this 245 * cgroup. Protected by css_set_lock. 246 */ 247 struct list_head cset_links; 248 249 /* 250 * On the default hierarchy, a css_set for a cgroup with some 251 * susbsys disabled will point to css's which are associated with 252 * the closest ancestor which has the subsys enabled. The 253 * following lists all css_sets which point to this cgroup's css 254 * for the given subsystem. 255 */ 256 struct list_head e_csets[CGROUP_SUBSYS_COUNT]; 257 258 /* 259 * list of pidlists, up to two for each namespace (one for procs, one 260 * for tasks); created on demand. 261 */ 262 struct list_head pidlists; 263 struct mutex pidlist_mutex; 264 265 /* used to wait for offlining of csses */ 266 wait_queue_head_t offline_waitq; 267 268 /* used to schedule release agent */ 269 struct work_struct release_agent_work; 270 }; 271 272 #define MAX_CGROUP_ROOT_NAMELEN 64 273 274 /* cgroup_root->flags */ 275 enum { 276 CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ 277 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 278 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 279 }; 280 281 /* 282 * A cgroup_root represents the root of a cgroup hierarchy, and may be 283 * associated with a kernfs_root to form an active hierarchy. This is 284 * internal to cgroup core. Don't access directly from controllers. 285 */ 286 struct cgroup_root { 287 struct kernfs_root *kf_root; 288 289 /* The bitmask of subsystems attached to this hierarchy */ 290 unsigned int subsys_mask; 291 292 /* Unique id for this hierarchy. */ 293 int hierarchy_id; 294 295 /* The root cgroup. Root is destroyed on its release. */ 296 struct cgroup cgrp; 297 298 /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ 299 atomic_t nr_cgrps; 300 301 /* A list running through the active hierarchies */ 302 struct list_head root_list; 303 304 /* Hierarchy-specific flags */ 305 unsigned int flags; 306 307 /* IDs for cgroups in this hierarchy */ 308 struct idr cgroup_idr; 309 310 /* The path to use for release notifications. */ 311 char release_agent_path[PATH_MAX]; 312 313 /* The name for this hierarchy - may be empty */ 314 char name[MAX_CGROUP_ROOT_NAMELEN]; 315 }; 316 317 /* 318 * A css_set is a structure holding pointers to a set of 319 * cgroup_subsys_state objects. This saves space in the task struct 320 * object and speeds up fork()/exit(), since a single inc/dec and a 321 * list_add()/del() can bump the reference count on the entire cgroup 322 * set for a task. 323 */ 324 325 struct css_set { 326 327 /* Reference count */ 328 atomic_t refcount; 329 330 /* 331 * List running through all cgroup groups in the same hash 332 * slot. Protected by css_set_lock 333 */ 334 struct hlist_node hlist; 335 336 /* 337 * Lists running through all tasks using this cgroup group. 338 * mg_tasks lists tasks which belong to this cset but are in the 339 * process of being migrated out or in. Protected by 340 * css_set_rwsem, but, during migration, once tasks are moved to 341 * mg_tasks, it can be read safely while holding cgroup_mutex. 342 */ 343 struct list_head tasks; 344 struct list_head mg_tasks; 345 346 /* 347 * List of cgrp_cset_links pointing at cgroups referenced from this 348 * css_set. Protected by css_set_lock. 349 */ 350 struct list_head cgrp_links; 351 352 /* the default cgroup associated with this css_set */ 353 struct cgroup *dfl_cgrp; 354 355 /* 356 * Set of subsystem states, one for each subsystem. This array is 357 * immutable after creation apart from the init_css_set during 358 * subsystem registration (at boot time). 359 */ 360 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 361 362 /* 363 * List of csets participating in the on-going migration either as 364 * source or destination. Protected by cgroup_mutex. 365 */ 366 struct list_head mg_preload_node; 367 struct list_head mg_node; 368 369 /* 370 * If this cset is acting as the source of migration the following 371 * two fields are set. mg_src_cgrp is the source cgroup of the 372 * on-going migration and mg_dst_cset is the destination cset the 373 * target tasks on this cset should be migrated to. Protected by 374 * cgroup_mutex. 375 */ 376 struct cgroup *mg_src_cgrp; 377 struct css_set *mg_dst_cset; 378 379 /* 380 * On the default hierarhcy, ->subsys[ssid] may point to a css 381 * attached to an ancestor instead of the cgroup this css_set is 382 * associated with. The following node is anchored at 383 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to 384 * iterate through all css's attached to a given cgroup. 385 */ 386 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; 387 388 /* For RCU-protected deletion */ 389 struct rcu_head rcu_head; 390 }; 391 392 /* 393 * struct cftype: handler definitions for cgroup control files 394 * 395 * When reading/writing to a file: 396 * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata 397 * - the 'cftype' of the file is file->f_path.dentry->d_fsdata 398 */ 399 400 /* cftype->flags */ 401 enum { 402 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ 403 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ 404 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ 405 406 /* internal flags, do not use outside cgroup core proper */ 407 __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ 408 __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ 409 }; 410 411 #define MAX_CFTYPE_NAME 64 412 413 struct cftype { 414 /* 415 * By convention, the name should begin with the name of the 416 * subsystem, followed by a period. Zero length string indicates 417 * end of cftype array. 418 */ 419 char name[MAX_CFTYPE_NAME]; 420 int private; 421 /* 422 * If not 0, file mode is set to this value, otherwise it will 423 * be figured out automatically 424 */ 425 umode_t mode; 426 427 /* 428 * The maximum length of string, excluding trailing nul, that can 429 * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. 430 */ 431 size_t max_write_len; 432 433 /* CFTYPE_* flags */ 434 unsigned int flags; 435 436 /* 437 * Fields used for internal bookkeeping. Initialized automatically 438 * during registration. 439 */ 440 struct cgroup_subsys *ss; /* NULL for cgroup core files */ 441 struct list_head node; /* anchored at ss->cfts */ 442 struct kernfs_ops *kf_ops; 443 444 /* 445 * read_u64() is a shortcut for the common case of returning a 446 * single integer. Use it in place of read() 447 */ 448 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); 449 /* 450 * read_s64() is a signed version of read_u64() 451 */ 452 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); 453 454 /* generic seq_file read interface */ 455 int (*seq_show)(struct seq_file *sf, void *v); 456 457 /* optional ops, implement all or none */ 458 void *(*seq_start)(struct seq_file *sf, loff_t *ppos); 459 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); 460 void (*seq_stop)(struct seq_file *sf, void *v); 461 462 /* 463 * write_u64() is a shortcut for the common case of accepting 464 * a single integer (as parsed by simple_strtoull) from 465 * userspace. Use in place of write(); return 0 or error. 466 */ 467 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, 468 u64 val); 469 /* 470 * write_s64() is a signed version of write_u64() 471 */ 472 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, 473 s64 val); 474 475 /* 476 * write() is the generic write callback which maps directly to 477 * kernfs write operation and overrides all other operations. 478 * Maximum write size is determined by ->max_write_len. Use 479 * of_css/cft() to access the associated css and cft. 480 */ 481 ssize_t (*write)(struct kernfs_open_file *of, 482 char *buf, size_t nbytes, loff_t off); 483 484 #ifdef CONFIG_DEBUG_LOCK_ALLOC 485 struct lock_class_key lockdep_key; 486 #endif 487 }; 488 489 extern struct cgroup_root cgrp_dfl_root; 490 extern struct css_set init_css_set; 491 492 /** 493 * cgroup_on_dfl - test whether a cgroup is on the default hierarchy 494 * @cgrp: the cgroup of interest 495 * 496 * The default hierarchy is the v2 interface of cgroup and this function 497 * can be used to test whether a cgroup is on the default hierarchy for 498 * cases where a subsystem should behave differnetly depending on the 499 * interface version. 500 * 501 * The set of behaviors which change on the default hierarchy are still 502 * being determined and the mount option is prefixed with __DEVEL__. 503 * 504 * List of changed behaviors: 505 * 506 * - Mount options "noprefix", "xattr", "clone_children", "release_agent" 507 * and "name" are disallowed. 508 * 509 * - When mounting an existing superblock, mount options should match. 510 * 511 * - Remount is disallowed. 512 * 513 * - rename(2) is disallowed. 514 * 515 * - "tasks" is removed. Everything should be at process granularity. Use 516 * "cgroup.procs" instead. 517 * 518 * - "cgroup.procs" is not sorted. pids will be unique unless they got 519 * recycled inbetween reads. 520 * 521 * - "release_agent" and "notify_on_release" are removed. Replacement 522 * notification mechanism will be implemented. 523 * 524 * - "cgroup.clone_children" is removed. 525 * 526 * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup 527 * and its descendants contain no task; otherwise, 1. The file also 528 * generates kernfs notification which can be monitored through poll and 529 * [di]notify when the value of the file changes. 530 * 531 * - cpuset: tasks will be kept in empty cpusets when hotplug happens and 532 * take masks of ancestors with non-empty cpus/mems, instead of being 533 * moved to an ancestor. 534 * 535 * - cpuset: a task can be moved into an empty cpuset, and again it takes 536 * masks of ancestors. 537 * 538 * - memcg: use_hierarchy is on by default and the cgroup file for the flag 539 * is not created. 540 * 541 * - blkcg: blk-throttle becomes properly hierarchical. 542 * 543 * - debug: disallowed on the default hierarchy. 544 */ 545 static inline bool cgroup_on_dfl(const struct cgroup *cgrp) 546 { 547 return cgrp->root == &cgrp_dfl_root; 548 } 549 550 /* no synchronization, the result can only be used as a hint */ 551 static inline bool cgroup_has_tasks(struct cgroup *cgrp) 552 { 553 return !list_empty(&cgrp->cset_links); 554 } 555 556 /* returns ino associated with a cgroup */ 557 static inline ino_t cgroup_ino(struct cgroup *cgrp) 558 { 559 return cgrp->kn->ino; 560 } 561 562 /* cft/css accessors for cftype->write() operation */ 563 static inline struct cftype *of_cft(struct kernfs_open_file *of) 564 { 565 return of->kn->priv; 566 } 567 568 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of); 569 570 /* cft/css accessors for cftype->seq_*() operations */ 571 static inline struct cftype *seq_cft(struct seq_file *seq) 572 { 573 return of_cft(seq->private); 574 } 575 576 static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) 577 { 578 return of_css(seq->private); 579 } 580 581 /* 582 * Name / path handling functions. All are thin wrappers around the kernfs 583 * counterparts and can be called under any context. 584 */ 585 586 static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) 587 { 588 return kernfs_name(cgrp->kn, buf, buflen); 589 } 590 591 static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, 592 size_t buflen) 593 { 594 return kernfs_path(cgrp->kn, buf, buflen); 595 } 596 597 static inline void pr_cont_cgroup_name(struct cgroup *cgrp) 598 { 599 pr_cont_kernfs_name(cgrp->kn); 600 } 601 602 static inline void pr_cont_cgroup_path(struct cgroup *cgrp) 603 { 604 pr_cont_kernfs_path(cgrp->kn); 605 } 606 607 char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); 608 609 int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 610 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 611 int cgroup_rm_cftypes(struct cftype *cfts); 612 613 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); 614 615 /* 616 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 617 * methods. 618 */ 619 struct cgroup_taskset; 620 struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); 621 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); 622 623 /** 624 * cgroup_taskset_for_each - iterate cgroup_taskset 625 * @task: the loop cursor 626 * @tset: taskset to iterate 627 */ 628 #define cgroup_taskset_for_each(task, tset) \ 629 for ((task) = cgroup_taskset_first((tset)); (task); \ 630 (task) = cgroup_taskset_next((tset))) 631 632 /* 633 * Control Group subsystem type. 634 * See Documentation/cgroups/cgroups.txt for details 635 */ 636 637 struct cgroup_subsys { 638 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); 639 int (*css_online)(struct cgroup_subsys_state *css); 640 void (*css_offline)(struct cgroup_subsys_state *css); 641 void (*css_released)(struct cgroup_subsys_state *css); 642 void (*css_free)(struct cgroup_subsys_state *css); 643 void (*css_reset)(struct cgroup_subsys_state *css); 644 void (*css_e_css_changed)(struct cgroup_subsys_state *css); 645 646 int (*can_attach)(struct cgroup_subsys_state *css, 647 struct cgroup_taskset *tset); 648 void (*cancel_attach)(struct cgroup_subsys_state *css, 649 struct cgroup_taskset *tset); 650 void (*attach)(struct cgroup_subsys_state *css, 651 struct cgroup_taskset *tset); 652 void (*fork)(struct task_struct *task); 653 void (*exit)(struct cgroup_subsys_state *css, 654 struct cgroup_subsys_state *old_css, 655 struct task_struct *task); 656 void (*bind)(struct cgroup_subsys_state *root_css); 657 658 int disabled; 659 int early_init; 660 661 /* 662 * If %false, this subsystem is properly hierarchical - 663 * configuration, resource accounting and restriction on a parent 664 * cgroup cover those of its children. If %true, hierarchy support 665 * is broken in some ways - some subsystems ignore hierarchy 666 * completely while others are only implemented half-way. 667 * 668 * It's now disallowed to create nested cgroups if the subsystem is 669 * broken and cgroup core will emit a warning message on such 670 * cases. Eventually, all subsystems will be made properly 671 * hierarchical and this will go away. 672 */ 673 bool broken_hierarchy; 674 bool warned_broken_hierarchy; 675 676 /* the following two fields are initialized automtically during boot */ 677 int id; 678 #define MAX_CGROUP_TYPE_NAMELEN 32 679 const char *name; 680 681 /* link to parent, protected by cgroup_lock() */ 682 struct cgroup_root *root; 683 684 /* idr for css->id */ 685 struct idr css_idr; 686 687 /* 688 * List of cftypes. Each entry is the first entry of an array 689 * terminated by zero length name. 690 */ 691 struct list_head cfts; 692 693 /* 694 * Base cftypes which are automatically registered. The two can 695 * point to the same array. 696 */ 697 struct cftype *dfl_cftypes; /* for the default hierarchy */ 698 struct cftype *legacy_cftypes; /* for the legacy hierarchies */ 699 700 /* 701 * A subsystem may depend on other subsystems. When such subsystem 702 * is enabled on a cgroup, the depended-upon subsystems are enabled 703 * together if available. Subsystems enabled due to dependency are 704 * not visible to userland until explicitly enabled. The following 705 * specifies the mask of subsystems that this one depends on. 706 */ 707 unsigned int depends_on; 708 }; 709 710 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; 711 #include <linux/cgroup_subsys.h> 712 #undef SUBSYS 713 714 /** 715 * task_css_set_check - obtain a task's css_set with extra access conditions 716 * @task: the task to obtain css_set for 717 * @__c: extra condition expression to be passed to rcu_dereference_check() 718 * 719 * A task's css_set is RCU protected, initialized and exited while holding 720 * task_lock(), and can only be modified while holding both cgroup_mutex 721 * and task_lock() while the task is alive. This macro verifies that the 722 * caller is inside proper critical section and returns @task's css_set. 723 * 724 * The caller can also specify additional allowed conditions via @__c, such 725 * as locks used during the cgroup_subsys::attach() methods. 726 */ 727 #ifdef CONFIG_PROVE_RCU 728 extern struct mutex cgroup_mutex; 729 extern struct rw_semaphore css_set_rwsem; 730 #define task_css_set_check(task, __c) \ 731 rcu_dereference_check((task)->cgroups, \ 732 lockdep_is_held(&cgroup_mutex) || \ 733 lockdep_is_held(&css_set_rwsem) || \ 734 ((task)->flags & PF_EXITING) || (__c)) 735 #else 736 #define task_css_set_check(task, __c) \ 737 rcu_dereference((task)->cgroups) 738 #endif 739 740 /** 741 * task_css_check - obtain css for (task, subsys) w/ extra access conds 742 * @task: the target task 743 * @subsys_id: the target subsystem ID 744 * @__c: extra condition expression to be passed to rcu_dereference_check() 745 * 746 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The 747 * synchronization rules are the same as task_css_set_check(). 748 */ 749 #define task_css_check(task, subsys_id, __c) \ 750 task_css_set_check((task), (__c))->subsys[(subsys_id)] 751 752 /** 753 * task_css_set - obtain a task's css_set 754 * @task: the task to obtain css_set for 755 * 756 * See task_css_set_check(). 757 */ 758 static inline struct css_set *task_css_set(struct task_struct *task) 759 { 760 return task_css_set_check(task, false); 761 } 762 763 /** 764 * task_css - obtain css for (task, subsys) 765 * @task: the target task 766 * @subsys_id: the target subsystem ID 767 * 768 * See task_css_check(). 769 */ 770 static inline struct cgroup_subsys_state *task_css(struct task_struct *task, 771 int subsys_id) 772 { 773 return task_css_check(task, subsys_id, false); 774 } 775 776 /** 777 * task_css_is_root - test whether a task belongs to the root css 778 * @task: the target task 779 * @subsys_id: the target subsystem ID 780 * 781 * Test whether @task belongs to the root css on the specified subsystem. 782 * May be invoked in any context. 783 */ 784 static inline bool task_css_is_root(struct task_struct *task, int subsys_id) 785 { 786 return task_css_check(task, subsys_id, true) == 787 init_css_set.subsys[subsys_id]; 788 } 789 790 static inline struct cgroup *task_cgroup(struct task_struct *task, 791 int subsys_id) 792 { 793 return task_css(task, subsys_id)->cgroup; 794 } 795 796 struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, 797 struct cgroup_subsys_state *parent); 798 799 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); 800 801 /** 802 * css_for_each_child - iterate through children of a css 803 * @pos: the css * to use as the loop cursor 804 * @parent: css whose children to walk 805 * 806 * Walk @parent's children. Must be called under rcu_read_lock(). 807 * 808 * If a subsystem synchronizes ->css_online() and the start of iteration, a 809 * css which finished ->css_online() is guaranteed to be visible in the 810 * future iterations and will stay visible until the last reference is put. 811 * A css which hasn't finished ->css_online() or already finished 812 * ->css_offline() may show up during traversal. It's each subsystem's 813 * responsibility to synchronize against on/offlining. 814 * 815 * It is allowed to temporarily drop RCU read lock during iteration. The 816 * caller is responsible for ensuring that @pos remains accessible until 817 * the start of the next iteration by, for example, bumping the css refcnt. 818 */ 819 #define css_for_each_child(pos, parent) \ 820 for ((pos) = css_next_child(NULL, (parent)); (pos); \ 821 (pos) = css_next_child((pos), (parent))) 822 823 struct cgroup_subsys_state * 824 css_next_descendant_pre(struct cgroup_subsys_state *pos, 825 struct cgroup_subsys_state *css); 826 827 struct cgroup_subsys_state * 828 css_rightmost_descendant(struct cgroup_subsys_state *pos); 829 830 /** 831 * css_for_each_descendant_pre - pre-order walk of a css's descendants 832 * @pos: the css * to use as the loop cursor 833 * @root: css whose descendants to walk 834 * 835 * Walk @root's descendants. @root is included in the iteration and the 836 * first node to be visited. Must be called under rcu_read_lock(). 837 * 838 * If a subsystem synchronizes ->css_online() and the start of iteration, a 839 * css which finished ->css_online() is guaranteed to be visible in the 840 * future iterations and will stay visible until the last reference is put. 841 * A css which hasn't finished ->css_online() or already finished 842 * ->css_offline() may show up during traversal. It's each subsystem's 843 * responsibility to synchronize against on/offlining. 844 * 845 * For example, the following guarantees that a descendant can't escape 846 * state updates of its ancestors. 847 * 848 * my_online(@css) 849 * { 850 * Lock @css's parent and @css; 851 * Inherit state from the parent; 852 * Unlock both. 853 * } 854 * 855 * my_update_state(@css) 856 * { 857 * css_for_each_descendant_pre(@pos, @css) { 858 * Lock @pos; 859 * if (@pos == @css) 860 * Update @css's state; 861 * else 862 * Verify @pos is alive and inherit state from its parent; 863 * Unlock @pos; 864 * } 865 * } 866 * 867 * As long as the inheriting step, including checking the parent state, is 868 * enclosed inside @pos locking, double-locking the parent isn't necessary 869 * while inheriting. The state update to the parent is guaranteed to be 870 * visible by walking order and, as long as inheriting operations to the 871 * same @pos are atomic to each other, multiple updates racing each other 872 * still result in the correct state. It's guaranateed that at least one 873 * inheritance happens for any css after the latest update to its parent. 874 * 875 * If checking parent's state requires locking the parent, each inheriting 876 * iteration should lock and unlock both @pos->parent and @pos. 877 * 878 * Alternatively, a subsystem may choose to use a single global lock to 879 * synchronize ->css_online() and ->css_offline() against tree-walking 880 * operations. 881 * 882 * It is allowed to temporarily drop RCU read lock during iteration. The 883 * caller is responsible for ensuring that @pos remains accessible until 884 * the start of the next iteration by, for example, bumping the css refcnt. 885 */ 886 #define css_for_each_descendant_pre(pos, css) \ 887 for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ 888 (pos) = css_next_descendant_pre((pos), (css))) 889 890 struct cgroup_subsys_state * 891 css_next_descendant_post(struct cgroup_subsys_state *pos, 892 struct cgroup_subsys_state *css); 893 894 /** 895 * css_for_each_descendant_post - post-order walk of a css's descendants 896 * @pos: the css * to use as the loop cursor 897 * @css: css whose descendants to walk 898 * 899 * Similar to css_for_each_descendant_pre() but performs post-order 900 * traversal instead. @root is included in the iteration and the last 901 * node to be visited. 902 * 903 * If a subsystem synchronizes ->css_online() and the start of iteration, a 904 * css which finished ->css_online() is guaranteed to be visible in the 905 * future iterations and will stay visible until the last reference is put. 906 * A css which hasn't finished ->css_online() or already finished 907 * ->css_offline() may show up during traversal. It's each subsystem's 908 * responsibility to synchronize against on/offlining. 909 * 910 * Note that the walk visibility guarantee example described in pre-order 911 * walk doesn't apply the same to post-order walks. 912 */ 913 #define css_for_each_descendant_post(pos, css) \ 914 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ 915 (pos) = css_next_descendant_post((pos), (css))) 916 917 bool css_has_online_children(struct cgroup_subsys_state *css); 918 919 /* A css_task_iter should be treated as an opaque object */ 920 struct css_task_iter { 921 struct cgroup_subsys *ss; 922 923 struct list_head *cset_pos; 924 struct list_head *cset_head; 925 926 struct list_head *task_pos; 927 struct list_head *tasks_head; 928 struct list_head *mg_tasks_head; 929 }; 930 931 void css_task_iter_start(struct cgroup_subsys_state *css, 932 struct css_task_iter *it); 933 struct task_struct *css_task_iter_next(struct css_task_iter *it); 934 void css_task_iter_end(struct css_task_iter *it); 935 936 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 937 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 938 939 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, 940 struct cgroup_subsys *ss); 941 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, 942 struct cgroup_subsys *ss); 943 944 #else /* !CONFIG_CGROUPS */ 945 946 static inline int cgroup_init_early(void) { return 0; } 947 static inline int cgroup_init(void) { return 0; } 948 static inline void cgroup_fork(struct task_struct *p) {} 949 static inline void cgroup_post_fork(struct task_struct *p) {} 950 static inline void cgroup_exit(struct task_struct *p) {} 951 952 static inline int cgroupstats_build(struct cgroupstats *stats, 953 struct dentry *dentry) 954 { 955 return -EINVAL; 956 } 957 958 /* No cgroups - nothing to do */ 959 static inline int cgroup_attach_task_all(struct task_struct *from, 960 struct task_struct *t) 961 { 962 return 0; 963 } 964 965 #endif /* !CONFIG_CGROUPS */ 966 967 #endif /* _LINUX_CGROUP_H */ 968