Lines Matching +full:cs +full:- +full:out

7  *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
11 * sysfs is Copyright (c) 2001-3 Patrick Mochel
13 * 2003-10-10 Written by Simon Derr.
14 * 2003-10-22 Updates by Stephen Hemminger.
15 * 2004 May-July Rework by Paul Jackson.
24 #include "cgroup-internal.h"
53 * node binding, add this key to provide a quick low-cost judgment
99 * The user-configured masks can only be changed by writing to
113 * The user-configured masks are always the same with effective masks.
116 /* user-configured CPUs and Memory Nodes allow to tasks */
125 * CPUs allocated to child sub-partitions (default hierarchy only)
126 * - CPUs granted by the parent = effective_cpus U subparts_cpus
127 * - effective_cpus and subparts_cpus are mutually exclusive.
137 * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
138 * - A new cpuset's old_mems_allowed is initialized when some
140 * - old_mems_allowed is used in cpuset_migrate_mm() when we change
150 * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
168 * use_parent_ecpus - set if using parent's effective_cpus
169 * child_ecpus_count - # of children with use_parent_ecpus set
192 * 0 - member (not a partition root)
193 * 1 - partition root
194 * 2 - partition root without load balancing (isolated)
195 * -1 - invalid partition root
196 * -2 - invalid isolated partition root
201 #define PRS_INVALID_ROOT -1
202 #define PRS_INVALID_ISOLATED -2
229 static inline struct cpuset *parent_cs(struct cpuset *cs) in parent_cs() argument
231 return css_cs(cs->css.parent); in parent_cs()
236 struct cpuset *cs = task_cs(p); in inc_dl_tasks_cs() local
238 cs->nr_deadline_tasks++; in inc_dl_tasks_cs()
243 struct cpuset *cs = task_cs(p); in dec_dl_tasks_cs() local
245 cs->nr_deadline_tasks--; in dec_dl_tasks_cs()
261 static inline bool is_cpuset_online(struct cpuset *cs) in is_cpuset_online() argument
263 return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); in is_cpuset_online()
266 static inline int is_cpu_exclusive(const struct cpuset *cs) in is_cpu_exclusive() argument
268 return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); in is_cpu_exclusive()
271 static inline int is_mem_exclusive(const struct cpuset *cs) in is_mem_exclusive() argument
273 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); in is_mem_exclusive()
276 static inline int is_mem_hardwall(const struct cpuset *cs) in is_mem_hardwall() argument
278 return test_bit(CS_MEM_HARDWALL, &cs->flags); in is_mem_hardwall()
281 static inline int is_sched_load_balance(const struct cpuset *cs) in is_sched_load_balance() argument
283 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in is_sched_load_balance()
286 static inline int is_memory_migrate(const struct cpuset *cs) in is_memory_migrate() argument
288 return test_bit(CS_MEMORY_MIGRATE, &cs->flags); in is_memory_migrate()
291 static inline int is_spread_page(const struct cpuset *cs) in is_spread_page() argument
293 return test_bit(CS_SPREAD_PAGE, &cs->flags); in is_spread_page()
296 static inline int is_spread_slab(const struct cpuset *cs) in is_spread_slab() argument
298 return test_bit(CS_SPREAD_SLAB, &cs->flags); in is_spread_slab()
301 static inline int is_partition_valid(const struct cpuset *cs) in is_partition_valid() argument
303 return cs->partition_root_state > 0; in is_partition_valid()
306 static inline int is_partition_invalid(const struct cpuset *cs) in is_partition_invalid() argument
308 return cs->partition_root_state < 0; in is_partition_invalid()
314 static inline void make_partition_invalid(struct cpuset *cs) in make_partition_invalid() argument
316 if (is_partition_valid(cs)) in make_partition_invalid()
317 cs->partition_root_state = -cs->partition_root_state; in make_partition_invalid()
323 static inline void notify_partition_change(struct cpuset *cs, int old_prs) in notify_partition_change() argument
325 if (old_prs == cs->partition_root_state) in notify_partition_change()
327 cgroup_file_notify(&cs->partition_file); in notify_partition_change()
330 if (is_partition_valid(cs)) in notify_partition_change()
331 WRITE_ONCE(cs->prs_err, PERR_NONE); in notify_partition_change()
341 * cpuset_for_each_child - traverse online children of a cpuset
350 css_for_each_child((pos_css), &(parent_cs)->css) \
354 * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
365 css_for_each_descendant_pre((pos_css), &(root_cs)->css) \
369 * There are two global locks guarding cpuset structures - cpuset_mutex and
375 * paths that rely on priority inheritance (e.g. scheduler - on RT) for
392 * If a task is only holding callback_lock, then it has read-only
400 * small pieces of code, such as when reading out possibly multi-word
453 (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE); in is_in_v2_mode()
457 * partition_is_populated - check if partition has tasks
458 * @cs: partition root to be checked
462 * It is assumed that @cs is a valid partition root. @excluded_child should
463 * be non-NULL when this cpuset is going to become a partition itself.
465 static inline bool partition_is_populated(struct cpuset *cs, in partition_is_populated() argument
471 if (cs->css.cgroup->nr_populated_csets) in partition_is_populated()
473 if (!excluded_child && !cs->nr_subparts_cpus) in partition_is_populated()
474 return cgroup_is_populated(cs->css.cgroup); in partition_is_populated()
477 cpuset_for_each_child(child, css, cs) { in partition_is_populated()
482 if (cgroup_is_populated(child->css.cgroup)) { in partition_is_populated()
497 * One way or another, we guarantee to return some non-empty subset
506 struct cpuset *cs; in guarantee_online_cpus() local
512 cs = task_cs(tsk); in guarantee_online_cpus()
514 while (!cpumask_intersects(cs->effective_cpus, pmask)) { in guarantee_online_cpus()
515 cs = parent_cs(cs); in guarantee_online_cpus()
516 if (unlikely(!cs)) { in guarantee_online_cpus()
527 cpumask_and(pmask, pmask, cs->effective_cpus); in guarantee_online_cpus()
539 * One way or another, we guarantee to return some non-empty subset
544 static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) in guarantee_online_mems() argument
546 while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) in guarantee_online_mems()
547 cs = parent_cs(cs); in guarantee_online_mems()
548 nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); in guarantee_online_mems()
557 static void cpuset_update_task_spread_flags(struct cpuset *cs, in cpuset_update_task_spread_flags() argument
563 if (is_spread_page(cs)) in cpuset_update_task_spread_flags()
568 if (is_spread_slab(cs)) in cpuset_update_task_spread_flags()
575 * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
584 return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && in is_cpuset_subset()
585 nodes_subset(p->mems_allowed, q->mems_allowed) && in is_cpuset_subset()
591 * alloc_cpumasks - allocate three cpumasks for cpuset
592 * @cs: the cpuset that have cpumasks to be allocated.
594 * Return: 0 if successful, -ENOMEM otherwise.
596 * Only one of the two input arguments should be non-NULL.
598 static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) in alloc_cpumasks() argument
602 if (cs) { in alloc_cpumasks()
603 pmask1 = &cs->cpus_allowed; in alloc_cpumasks()
604 pmask2 = &cs->effective_cpus; in alloc_cpumasks()
605 pmask3 = &cs->subparts_cpus; in alloc_cpumasks()
607 pmask1 = &tmp->new_cpus; in alloc_cpumasks()
608 pmask2 = &tmp->addmask; in alloc_cpumasks()
609 pmask3 = &tmp->delmask; in alloc_cpumasks()
613 return -ENOMEM; in alloc_cpumasks()
627 return -ENOMEM; in alloc_cpumasks()
631 * free_cpumasks - free cpumasks in a tmpmasks structure
632 * @cs: the cpuset that have cpumasks to be free.
635 static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) in free_cpumasks() argument
637 if (cs) { in free_cpumasks()
638 free_cpumask_var(cs->cpus_allowed); in free_cpumasks()
639 free_cpumask_var(cs->effective_cpus); in free_cpumasks()
640 free_cpumask_var(cs->subparts_cpus); in free_cpumasks()
643 free_cpumask_var(tmp->new_cpus); in free_cpumasks()
644 free_cpumask_var(tmp->addmask); in free_cpumasks()
645 free_cpumask_var(tmp->delmask); in free_cpumasks()
650 * alloc_trial_cpuset - allocate a trial cpuset
651 * @cs: the cpuset that the trial cpuset duplicates
653 static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) in alloc_trial_cpuset() argument
657 trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL); in alloc_trial_cpuset()
666 cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); in alloc_trial_cpuset()
667 cpumask_copy(trial->effective_cpus, cs->effective_cpus); in alloc_trial_cpuset()
672 * free_cpuset - free the cpuset
673 * @cs: the cpuset to be freed
675 static inline void free_cpuset(struct cpuset *cs) in free_cpuset() argument
677 free_cpumasks(cs, NULL); in free_cpuset()
678 kfree(cs); in free_cpuset()
682 * validate_change_legacy() - Validate conditions specific to legacy (v1)
694 ret = -EBUSY; in validate_change_legacy()
697 goto out; in validate_change_legacy()
700 ret = -EACCES; in validate_change_legacy()
703 goto out; in validate_change_legacy()
706 out: in validate_change_legacy()
711 * validate_change() - Used to validate that any proposed cpuset change
719 * 'cur' is the address of an actual, in-use cpuset. Operations
727 * Return 0 if valid, -errno if not.
741 goto out; in validate_change()
745 goto out; in validate_change()
750 * Cpusets with tasks - existing or newly being attached - can't in validate_change()
753 ret = -ENOSPC; in validate_change()
754 if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) { in validate_change()
755 if (!cpumask_empty(cur->cpus_allowed) && in validate_change()
756 cpumask_empty(trial->cpus_allowed)) in validate_change()
757 goto out; in validate_change()
758 if (!nodes_empty(cur->mems_allowed) && in validate_change()
759 nodes_empty(trial->mems_allowed)) in validate_change()
760 goto out; in validate_change()
767 ret = -EBUSY; in validate_change()
769 !cpuset_cpumask_can_shrink(cur->cpus_allowed, in validate_change()
770 trial->cpus_allowed)) in validate_change()
771 goto out; in validate_change()
777 ret = -EINVAL; in validate_change()
781 cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) in validate_change()
782 goto out; in validate_change()
785 nodes_intersects(trial->mems_allowed, c->mems_allowed)) in validate_change()
786 goto out; in validate_change()
790 out: in validate_change()
802 return cpumask_intersects(a->effective_cpus, b->effective_cpus); in cpusets_overlap()
808 if (dattr->relax_domain_level < c->relax_domain_level) in update_domain_attr()
809 dattr->relax_domain_level = c->relax_domain_level; in update_domain_attr()
822 if (cpumask_empty(cp->cpus_allowed)) { in update_domain_attr_tree()
836 /* jump label reference count + the top-level cpuset */ in nr_cpusets()
844 * A 'partial partition' is a set of non-overlapping subsets whose
851 * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
862 * cp - cpuset pointer, used (together with pos_css) to perform a
863 * top-down scan of all cpusets. For our purposes, rebuilding
866 * csa - (for CpuSet Array) Array of pointers to all the cpusets
873 * doms - Conversion of 'csa' to an array of cpumasks, for passing to
896 struct cpuset *cp; /* top-down scan of cpusets */ in generate_sched_domains()
948 * If root is load-balancing, we can skip @cp if it in generate_sched_domains()
951 if (!cpumask_empty(cp->cpus_allowed) && in generate_sched_domains()
953 cpumask_intersects(cp->cpus_allowed, in generate_sched_domains()
958 cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus)) in generate_sched_domains()
962 !cpumask_empty(cp->effective_cpus)) in generate_sched_domains()
972 csa[i]->pn = i; in generate_sched_domains()
979 int apn = a->pn; in generate_sched_domains()
983 int bpn = b->pn; in generate_sched_domains()
989 if (c->pn == bpn) in generate_sched_domains()
990 c->pn = apn; in generate_sched_domains()
992 ndoms--; /* one less element */ in generate_sched_domains()
1016 int apn = a->pn; in generate_sched_domains()
1030 warnings--; in generate_sched_domains()
1041 if (apn == b->pn) { in generate_sched_domains()
1042 cpumask_or(dp, dp, b->effective_cpus); in generate_sched_domains()
1048 b->pn = -1; in generate_sched_domains()
1070 static void dl_update_tasks_root_domain(struct cpuset *cs) in dl_update_tasks_root_domain() argument
1075 if (cs->nr_deadline_tasks == 0) in dl_update_tasks_root_domain()
1078 css_task_iter_start(&cs->css, 0, &it); in dl_update_tasks_root_domain()
1088 struct cpuset *cs = NULL; in dl_rebuild_rd_accounting() local
1103 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { in dl_rebuild_rd_accounting()
1105 if (cpumask_empty(cs->effective_cpus)) { in dl_rebuild_rd_accounting()
1110 css_get(&cs->css); in dl_rebuild_rd_accounting()
1114 dl_update_tasks_root_domain(cs); in dl_rebuild_rd_accounting()
1117 css_put(&cs->css); in dl_rebuild_rd_accounting()
1135 * If the flag 'sched_load_balance' of any cpuset with non-empty
1137 * which has that flag enabled, or if any cpuset with a non-empty
1148 struct cpuset *cs; in rebuild_sched_domains_locked() local
1174 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { in rebuild_sched_domains_locked()
1175 if (!is_partition_valid(cs)) { in rebuild_sched_domains_locked()
1179 if (!cpumask_subset(cs->effective_cpus, in rebuild_sched_domains_locked()
1210 * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
1211 * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
1214 * Iterate through each task of @cs updating its cpus_allowed to the
1220 static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) in update_tasks_cpumask() argument
1224 bool top_cs = cs == &top_cpuset; in update_tasks_cpumask()
1226 css_task_iter_start(&cs->css, 0, &it); in update_tasks_cpumask()
1236 cpumask_andnot(new_cpus, possible_mask, cs->subparts_cpus); in update_tasks_cpumask()
1238 cpumask_and(new_cpus, possible_mask, cs->effective_cpus); in update_tasks_cpumask()
1246 * compute_effective_cpumask - Compute the effective cpumask of the cpuset
1248 * @cs: the cpuset the need to recompute the new effective_cpus mask
1254 * to mask those out.
1257 struct cpuset *cs, struct cpuset *parent) in compute_effective_cpumask() argument
1259 if (parent->nr_subparts_cpus && is_partition_valid(cs)) { in compute_effective_cpumask()
1260 cpumask_or(new_cpus, parent->effective_cpus, in compute_effective_cpumask()
1261 parent->subparts_cpus); in compute_effective_cpumask()
1262 cpumask_and(new_cpus, new_cpus, cs->cpus_allowed); in compute_effective_cpumask()
1265 cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus); in compute_effective_cpumask()
1279 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1281 static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
1289 static int update_partition_exclusive(struct cpuset *cs, int new_prs) in update_partition_exclusive() argument
1293 if (exclusive && !is_cpu_exclusive(cs)) { in update_partition_exclusive()
1294 if (update_flag(CS_CPU_EXCLUSIVE, cs, 1)) in update_partition_exclusive()
1296 } else if (!exclusive && is_cpu_exclusive(cs)) { in update_partition_exclusive()
1298 update_flag(CS_CPU_EXCLUSIVE, cs, 0); in update_partition_exclusive()
1310 static void update_partition_sd_lb(struct cpuset *cs, int old_prs) in update_partition_sd_lb() argument
1312 int new_prs = cs->partition_root_state; in update_partition_sd_lb()
1317 * If cs is not a valid partition root, the load balance state in update_partition_sd_lb()
1323 new_lb = is_sched_load_balance(parent_cs(cs)); in update_partition_sd_lb()
1325 if (new_lb != !!is_sched_load_balance(cs)) { in update_partition_sd_lb()
1328 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in update_partition_sd_lb()
1330 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in update_partition_sd_lb()
1338 * update_parent_subparts_cpumask - update subparts_cpus mask of parent cpuset
1339 * @cs: The cpuset that requests change in partition root state
1345 * For partcmd_enable, the cpuset is being transformed from a non-partition
1352 * root back to a non-partition root. Any CPUs in cpus_allowed that are in
1375 static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, in update_parent_subparts_cpumask() argument
1379 struct cpuset *parent = parent_cs(cs); in update_parent_subparts_cpumask()
1396 if (!newmask && cpumask_empty(cs->cpus_allowed)) in update_parent_subparts_cpumask()
1404 old_prs = new_prs = cs->partition_root_state; in update_parent_subparts_cpumask()
1410 if (!cpumask_intersects(cs->cpus_allowed, parent->cpus_allowed)) in update_parent_subparts_cpumask()
1417 if (cpumask_subset(parent->effective_cpus, cs->cpus_allowed) && in update_parent_subparts_cpumask()
1418 partition_is_populated(parent, cs)) in update_parent_subparts_cpumask()
1421 cpumask_copy(tmp->addmask, cs->cpus_allowed); in update_parent_subparts_cpumask()
1429 cpumask_and(tmp->delmask, cs->cpus_allowed, in update_parent_subparts_cpumask()
1430 parent->subparts_cpus); in update_parent_subparts_cpumask()
1439 deleting = cpumask_and(tmp->delmask, cs->cpus_allowed, in update_parent_subparts_cpumask()
1440 parent->subparts_cpus); in update_parent_subparts_cpumask()
1442 new_prs = -old_prs; in update_parent_subparts_cpumask()
1451 * delmask = cpus_allowed & ~newmask & parent->subparts_cpus in update_parent_subparts_cpumask()
1452 * addmask = newmask & parent->cpus_allowed in update_parent_subparts_cpumask()
1453 * & ~parent->subparts_cpus in update_parent_subparts_cpumask()
1455 cpumask_andnot(tmp->delmask, cs->cpus_allowed, newmask); in update_parent_subparts_cpumask()
1456 deleting = cpumask_and(tmp->delmask, tmp->delmask, in update_parent_subparts_cpumask()
1457 parent->subparts_cpus); in update_parent_subparts_cpumask()
1459 cpumask_and(tmp->addmask, newmask, parent->cpus_allowed); in update_parent_subparts_cpumask()
1460 adding = cpumask_andnot(tmp->addmask, tmp->addmask, in update_parent_subparts_cpumask()
1461 parent->subparts_cpus); in update_parent_subparts_cpumask()
1472 cpumask_subset(parent->effective_cpus, tmp->addmask) && in update_parent_subparts_cpumask()
1473 !cpumask_intersects(tmp->delmask, cpu_active_mask) && in update_parent_subparts_cpumask()
1474 partition_is_populated(parent, cs)) { in update_parent_subparts_cpumask()
1477 deleting = cpumask_and(tmp->delmask, cs->cpus_allowed, in update_parent_subparts_cpumask()
1478 parent->subparts_cpus); in update_parent_subparts_cpumask()
1484 * delmask = cpus_allowed & parent->subparts_cpus in update_parent_subparts_cpumask()
1485 * addmask = cpus_allowed & parent->cpus_allowed in update_parent_subparts_cpumask()
1486 * & ~parent->subparts_cpus in update_parent_subparts_cpumask()
1495 * out any CPUs. in update_parent_subparts_cpumask()
1497 * to be distributed out. in update_parent_subparts_cpumask()
1499 cpumask_and(tmp->addmask, cs->cpus_allowed, in update_parent_subparts_cpumask()
1500 parent->cpus_allowed); in update_parent_subparts_cpumask()
1501 adding = cpumask_andnot(tmp->addmask, tmp->addmask, in update_parent_subparts_cpumask()
1502 parent->subparts_cpus); in update_parent_subparts_cpumask()
1504 if ((is_partition_valid(cs) && !parent->nr_subparts_cpus) || in update_parent_subparts_cpumask()
1506 cpumask_subset(parent->effective_cpus, tmp->addmask) && in update_parent_subparts_cpumask()
1507 partition_is_populated(parent, cs))) { in update_parent_subparts_cpumask()
1512 if (part_error && is_partition_valid(cs) && in update_parent_subparts_cpumask()
1513 parent->nr_subparts_cpus) in update_parent_subparts_cpumask()
1514 deleting = cpumask_and(tmp->delmask, cs->cpus_allowed, in update_parent_subparts_cpumask()
1515 parent->subparts_cpus); in update_parent_subparts_cpumask()
1518 WRITE_ONCE(cs->prs_err, part_error); in update_parent_subparts_cpumask()
1525 switch (cs->partition_root_state) { in update_parent_subparts_cpumask()
1529 new_prs = -old_prs; in update_parent_subparts_cpumask()
1534 new_prs = -old_prs; in update_parent_subparts_cpumask()
1547 int err = update_partition_exclusive(cs, new_prs); in update_parent_subparts_cpumask()
1560 cpumask_or(parent->subparts_cpus, in update_parent_subparts_cpumask()
1561 parent->subparts_cpus, tmp->addmask); in update_parent_subparts_cpumask()
1562 cpumask_andnot(parent->effective_cpus, in update_parent_subparts_cpumask()
1563 parent->effective_cpus, tmp->addmask); in update_parent_subparts_cpumask()
1566 cpumask_andnot(parent->subparts_cpus, in update_parent_subparts_cpumask()
1567 parent->subparts_cpus, tmp->delmask); in update_parent_subparts_cpumask()
1571 cpumask_and(tmp->delmask, tmp->delmask, cpu_active_mask); in update_parent_subparts_cpumask()
1572 cpumask_or(parent->effective_cpus, in update_parent_subparts_cpumask()
1573 parent->effective_cpus, tmp->delmask); in update_parent_subparts_cpumask()
1576 parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); in update_parent_subparts_cpumask()
1579 cs->partition_root_state = new_prs; in update_parent_subparts_cpumask()
1584 update_tasks_cpumask(parent, tmp->addmask); in update_parent_subparts_cpumask()
1585 if (parent->child_ecpus_count) in update_parent_subparts_cpumask()
1586 update_sibling_cpumasks(parent, cs, tmp); in update_parent_subparts_cpumask()
1596 update_partition_sd_lb(cs, old_prs); in update_parent_subparts_cpumask()
1600 notify_partition_change(cs, old_prs); in update_parent_subparts_cpumask()
1611 * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
1612 * @cs: the cpuset to consider
1623 static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, in update_cpumasks_hier() argument
1632 cpuset_for_each_descendant_pre(cp, pos_css, cs) { in update_cpumasks_hier()
1636 compute_effective_cpumask(tmp->new_cpus, cp, parent); in update_cpumasks_hier()
1642 * out all its CPUs. in update_cpumasks_hier()
1644 if (is_in_v2_mode() && cpumask_empty(tmp->new_cpus)) { in update_cpumasks_hier()
1646 cpumask_equal(cp->cpus_allowed, cp->subparts_cpus)) in update_cpumasks_hier()
1649 cpumask_copy(tmp->new_cpus, parent->effective_cpus); in update_cpumasks_hier()
1650 if (!cp->use_parent_ecpus) { in update_cpumasks_hier()
1651 cp->use_parent_ecpus = true; in update_cpumasks_hier()
1652 parent->child_ecpus_count++; in update_cpumasks_hier()
1654 } else if (cp->use_parent_ecpus) { in update_cpumasks_hier()
1655 cp->use_parent_ecpus = false; in update_cpumasks_hier()
1656 WARN_ON_ONCE(!parent->child_ecpus_count); in update_cpumasks_hier()
1657 parent->child_ecpus_count--; in update_cpumasks_hier()
1667 if (!cp->partition_root_state && !(flags & HIER_CHECKALL) && in update_cpumasks_hier()
1668 cpumask_equal(tmp->new_cpus, cp->effective_cpus) && in update_cpumasks_hier()
1678 * for cs already in update_cpumask(). We should also call in update_cpumasks_hier()
1682 old_prs = new_prs = cp->partition_root_state; in update_cpumasks_hier()
1683 if ((cp != cs) && old_prs) { in update_cpumasks_hier()
1684 switch (parent->partition_root_state) { in update_cpumasks_hier()
1697 new_prs = -cp->partition_root_state; in update_cpumasks_hier()
1698 WRITE_ONCE(cp->prs_err, in update_cpumasks_hier()
1705 if (!css_tryget_online(&cp->css)) in update_cpumasks_hier()
1716 new_prs = cp->partition_root_state; in update_cpumasks_hier()
1721 if (cp->nr_subparts_cpus && !is_partition_valid(cp)) { in update_cpumasks_hier()
1725 cpumask_or(tmp->new_cpus, tmp->new_cpus, in update_cpumasks_hier()
1726 cp->subparts_cpus); in update_cpumasks_hier()
1727 cpumask_and(tmp->new_cpus, tmp->new_cpus, in update_cpumasks_hier()
1729 cp->nr_subparts_cpus = 0; in update_cpumasks_hier()
1730 cpumask_clear(cp->subparts_cpus); in update_cpumasks_hier()
1733 cpumask_copy(cp->effective_cpus, tmp->new_cpus); in update_cpumasks_hier()
1734 if (cp->nr_subparts_cpus) { in update_cpumasks_hier()
1739 cpumask_andnot(cp->effective_cpus, cp->effective_cpus, in update_cpumasks_hier()
1740 cp->subparts_cpus); in update_cpumasks_hier()
1743 cp->partition_root_state = new_prs; in update_cpumasks_hier()
1749 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); in update_cpumasks_hier()
1751 update_tasks_cpumask(cp, tmp->new_cpus); in update_cpumasks_hier()
1762 set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags); in update_cpumasks_hier()
1764 clear_bit(CS_SCHED_LOAD_BALANCE, &cp->flags); in update_cpumasks_hier()
1768 * On legacy hierarchy, if the effective cpumask of any non- in update_cpumasks_hier()
1773 if (!cpumask_empty(cp->cpus_allowed) && in update_cpumasks_hier()
1780 css_put(&cp->css); in update_cpumasks_hier()
1789 * update_sibling_cpumasks - Update siblings cpumasks
1791 * @cs: Current cpuset
1794 static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, in update_sibling_cpumasks() argument
1814 if (sibling == cs) in update_sibling_cpumasks()
1816 if (!sibling->use_parent_ecpus) in update_sibling_cpumasks()
1818 if (!css_tryget_online(&sibling->css)) in update_sibling_cpumasks()
1824 css_put(&sibling->css); in update_sibling_cpumasks()
1830 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
1831 * @cs: the cpuset to consider
1835 static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, in update_cpumask() argument
1841 int old_prs = cs->partition_root_state; in update_cpumask()
1843 /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ in update_cpumask()
1844 if (cs == &top_cpuset) in update_cpumask()
1845 return -EACCES; in update_cpumask()
1854 cpumask_clear(trialcs->cpus_allowed); in update_cpumask()
1856 retval = cpulist_parse(buf, trialcs->cpus_allowed); in update_cpumask()
1860 if (!cpumask_subset(trialcs->cpus_allowed, in update_cpumask()
1862 return -EINVAL; in update_cpumask()
1866 if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) in update_cpumask()
1870 return -ENOMEM; in update_cpumask()
1872 retval = validate_change(cs, trialcs); in update_cpumask()
1874 if ((retval == -EINVAL) && cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { in update_cpumask()
1879 * The -EINVAL error code indicates that partition sibling in update_cpumask()
1887 parent = parent_cs(cs); in update_cpumask()
1890 cpumask_intersects(trialcs->cpus_allowed, cp->cpus_allowed)) { in update_cpumask()
1901 if (cs->partition_root_state) { in update_cpumask()
1903 update_parent_subparts_cpumask(cs, partcmd_invalidate, in update_cpumask()
1906 update_parent_subparts_cpumask(cs, partcmd_update, in update_cpumask()
1907 trialcs->cpus_allowed, &tmp); in update_cpumask()
1910 compute_effective_cpumask(trialcs->effective_cpus, trialcs, in update_cpumask()
1911 parent_cs(cs)); in update_cpumask()
1913 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); in update_cpumask()
1920 if (cs->nr_subparts_cpus) { in update_cpumask()
1921 if (!is_partition_valid(cs) || in update_cpumask()
1922 (cpumask_subset(trialcs->effective_cpus, cs->subparts_cpus) && in update_cpumask()
1923 partition_is_populated(cs, NULL))) { in update_cpumask()
1924 cs->nr_subparts_cpus = 0; in update_cpumask()
1925 cpumask_clear(cs->subparts_cpus); in update_cpumask()
1927 cpumask_and(cs->subparts_cpus, cs->subparts_cpus, in update_cpumask()
1928 cs->cpus_allowed); in update_cpumask()
1929 cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); in update_cpumask()
1935 update_cpumasks_hier(cs, &tmp, 0); in update_cpumask()
1937 if (cs->partition_root_state) { in update_cpumask()
1938 struct cpuset *parent = parent_cs(cs); in update_cpumask()
1944 if (parent->child_ecpus_count) in update_cpumask()
1945 update_sibling_cpumasks(parent, cs, &tmp); in update_cpumask()
1948 update_partition_sd_lb(cs, old_prs); in update_cpumask()
1976 do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); in cpuset_migrate_mm_workfn()
1977 mmput(mwork->mm); in cpuset_migrate_mm_workfn()
1993 mwork->mm = mm; in cpuset_migrate_mm()
1994 mwork->from = *from; in cpuset_migrate_mm()
1995 mwork->to = *to; in cpuset_migrate_mm()
1996 INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); in cpuset_migrate_mm()
1997 queue_work(cpuset_migrate_mm_wq, &mwork->work); in cpuset_migrate_mm()
2009 * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
2013 * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
2024 write_seqcount_begin(&tsk->mems_allowed_seq); in cpuset_change_task_nodemask()
2026 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); in cpuset_change_task_nodemask()
2028 tsk->mems_allowed = *newmems; in cpuset_change_task_nodemask()
2030 write_seqcount_end(&tsk->mems_allowed_seq); in cpuset_change_task_nodemask()
2039 * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
2040 * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
2042 * Iterate through each task of @cs updating its mems_allowed to the
2046 static void update_tasks_nodemask(struct cpuset *cs) in update_tasks_nodemask() argument
2052 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ in update_tasks_nodemask()
2054 guarantee_online_mems(cs, &newmems); in update_tasks_nodemask()
2058 * take while holding tasklist_lock. Forks can happen - the in update_tasks_nodemask()
2066 css_task_iter_start(&cs->css, 0, &it); in update_tasks_nodemask()
2077 migrate = is_memory_migrate(cs); in update_tasks_nodemask()
2079 mpol_rebind_mm(mm, &cs->mems_allowed); in update_tasks_nodemask()
2081 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); in update_tasks_nodemask()
2089 * cs->old_mems_allowed. in update_tasks_nodemask()
2091 cs->old_mems_allowed = newmems; in update_tasks_nodemask()
2098 * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
2099 * @cs: the cpuset to consider
2109 static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) in update_nodemasks_hier() argument
2115 cpuset_for_each_descendant_pre(cp, pos_css, cs) { in update_nodemasks_hier()
2118 nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); in update_nodemasks_hier()
2125 *new_mems = parent->effective_mems; in update_nodemasks_hier()
2128 if (nodes_equal(*new_mems, cp->effective_mems)) { in update_nodemasks_hier()
2133 if (!css_tryget_online(&cp->css)) in update_nodemasks_hier()
2138 cp->effective_mems = *new_mems; in update_nodemasks_hier()
2142 !nodes_equal(cp->mems_allowed, cp->effective_mems)); in update_nodemasks_hier()
2147 css_put(&cp->css); in update_nodemasks_hier()
2161 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
2162 * lock each such tasks mm->mmap_lock, scan its vma's and rebind
2165 static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, in update_nodemask() argument
2172 * it's read-only in update_nodemask()
2174 if (cs == &top_cpuset) { in update_nodemask()
2175 retval = -EACCES; in update_nodemask()
2186 nodes_clear(trialcs->mems_allowed); in update_nodemask()
2188 retval = nodelist_parse(buf, trialcs->mems_allowed); in update_nodemask()
2192 if (!nodes_subset(trialcs->mems_allowed, in update_nodemask()
2194 retval = -EINVAL; in update_nodemask()
2199 if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) { in update_nodemask()
2200 retval = 0; /* Too easy - nothing to do */ in update_nodemask()
2203 retval = validate_change(cs, trialcs); in update_nodemask()
2207 check_insane_mems_config(&trialcs->mems_allowed); in update_nodemask()
2210 cs->mems_allowed = trialcs->mems_allowed; in update_nodemask()
2213 /* use trialcs->mems_allowed as a temp variable */ in update_nodemask()
2214 update_nodemasks_hier(cs, &trialcs->mems_allowed); in update_nodemask()
2230 static int update_relax_domain_level(struct cpuset *cs, s64 val) in update_relax_domain_level() argument
2233 if (val < -1 || val > sched_domain_level_max + 1) in update_relax_domain_level()
2234 return -EINVAL; in update_relax_domain_level()
2237 if (val != cs->relax_domain_level) { in update_relax_domain_level()
2238 cs->relax_domain_level = val; in update_relax_domain_level()
2239 if (!cpumask_empty(cs->cpus_allowed) && in update_relax_domain_level()
2240 is_sched_load_balance(cs)) in update_relax_domain_level()
2248 * update_tasks_flags - update the spread flags of tasks in the cpuset.
2249 * @cs: the cpuset in which each task's spread flags needs to be changed
2251 * Iterate through each task of @cs updating its spread flags. As this
2255 static void update_tasks_flags(struct cpuset *cs) in update_tasks_flags() argument
2260 css_task_iter_start(&cs->css, 0, &it); in update_tasks_flags()
2262 cpuset_update_task_spread_flags(cs, task); in update_tasks_flags()
2267 * update_flag - read a 0 or a 1 in a file and update associated flag
2269 * cs: the cpuset to update
2275 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, in update_flag() argument
2283 trialcs = alloc_trial_cpuset(cs); in update_flag()
2285 return -ENOMEM; in update_flag()
2288 set_bit(bit, &trialcs->flags); in update_flag()
2290 clear_bit(bit, &trialcs->flags); in update_flag()
2292 err = validate_change(cs, trialcs); in update_flag()
2294 goto out; in update_flag()
2296 balance_flag_changed = (is_sched_load_balance(cs) != in update_flag()
2299 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) in update_flag()
2300 || (is_spread_page(cs) != is_spread_page(trialcs))); in update_flag()
2303 cs->flags = trialcs->flags; in update_flag()
2306 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) in update_flag()
2310 update_tasks_flags(cs); in update_flag()
2311 out: in update_flag()
2317 * update_prstate - update partition_root_state
2318 * @cs: the cpuset to update
2324 static int update_prstate(struct cpuset *cs, int new_prs) in update_prstate() argument
2326 int err = PERR_NONE, old_prs = cs->partition_root_state; in update_prstate()
2327 struct cpuset *parent = parent_cs(cs); in update_prstate()
2338 cs->partition_root_state = -new_prs; in update_prstate()
2343 return -ENOMEM; in update_prstate()
2345 err = update_partition_exclusive(cs, new_prs); in update_prstate()
2347 goto out; in update_prstate()
2353 if (cpumask_empty(cs->cpus_allowed)) { in update_prstate()
2355 goto out; in update_prstate()
2358 err = update_parent_subparts_cpumask(cs, partcmd_enable, in update_prstate()
2370 update_parent_subparts_cpumask(cs, partcmd_disable, NULL, in update_prstate()
2376 if (unlikely(cs->nr_subparts_cpus)) { in update_prstate()
2378 cs->nr_subparts_cpus = 0; in update_prstate()
2379 cpumask_clear(cs->subparts_cpus); in update_prstate()
2380 compute_effective_cpumask(cs->effective_cpus, cs, parent); in update_prstate()
2384 out: in update_prstate()
2390 new_prs = -new_prs; in update_prstate()
2391 update_partition_exclusive(cs, new_prs); in update_prstate()
2395 cs->partition_root_state = new_prs; in update_prstate()
2396 WRITE_ONCE(cs->prs_err, err); in update_prstate()
2403 if (!list_empty(&cs->css.children)) in update_prstate()
2404 update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0); in update_prstate()
2407 update_partition_sd_lb(cs, old_prs); in update_prstate()
2409 notify_partition_change(cs, old_prs); in update_prstate()
2415 * Frequency meter - How fast is some event occurring?
2419 * fmeter_init() - initialize a frequency meter.
2420 * fmeter_markevent() - called each time the event happens.
2421 * fmeter_getrate() - returns the recent rate of such events.
2422 * fmeter_update() - internal routine used to update fmeter.
2429 * The filter is single-pole low-pass recursive (IIR). The time unit
2430 * is 1 second. Arithmetic is done using 32-bit integers scaled to
2434 * has a half-life of 10 seconds, meaning that if the events quit
2449 * per msec it maxes out at values just under 1,000,000. At constant
2459 #define FM_COEF 933 /* coefficient for half-life of 10 secs */
2467 fmp->cnt = 0; in fmeter_init()
2468 fmp->val = 0; in fmeter_init()
2469 fmp->time = 0; in fmeter_init()
2470 spin_lock_init(&fmp->lock); in fmeter_init()
2473 /* Internal meter update - process cnt events and update value */
2480 ticks = now - fmp->time; in fmeter_update()
2486 while (ticks-- > 0) in fmeter_update()
2487 fmp->val = (FM_COEF * fmp->val) / FM_SCALE; in fmeter_update()
2488 fmp->time = now; in fmeter_update()
2490 fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE; in fmeter_update()
2491 fmp->cnt = 0; in fmeter_update()
2497 spin_lock(&fmp->lock); in fmeter_markevent()
2499 fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE); in fmeter_markevent()
2500 spin_unlock(&fmp->lock); in fmeter_markevent()
2508 spin_lock(&fmp->lock); in fmeter_getrate()
2510 val = fmp->val; in fmeter_getrate()
2511 spin_unlock(&fmp->lock); in fmeter_getrate()
2523 static int cpuset_can_attach_check(struct cpuset *cs) in cpuset_can_attach_check() argument
2525 if (cpumask_empty(cs->effective_cpus) || in cpuset_can_attach_check()
2526 (!is_in_v2_mode() && nodes_empty(cs->mems_allowed))) in cpuset_can_attach_check()
2527 return -ENOSPC; in cpuset_can_attach_check()
2531 static void reset_migrate_dl_data(struct cpuset *cs) in reset_migrate_dl_data() argument
2533 cs->nr_migrate_dl_tasks = 0; in reset_migrate_dl_data()
2534 cs->sum_migrate_dl_bw = 0; in reset_migrate_dl_data()
2541 struct cpuset *cs, *oldcs; in cpuset_can_attach() local
2549 cs = css_cs(css); in cpuset_can_attach()
2554 ret = cpuset_can_attach_check(cs); in cpuset_can_attach()
2558 cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus); in cpuset_can_attach()
2559 mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); in cpuset_can_attach()
2579 cs->nr_migrate_dl_tasks++; in cpuset_can_attach()
2580 cs->sum_migrate_dl_bw += task->dl.dl_bw; in cpuset_can_attach()
2584 if (!cs->nr_migrate_dl_tasks) in cpuset_can_attach()
2587 if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) { in cpuset_can_attach()
2588 int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); in cpuset_can_attach()
2591 reset_migrate_dl_data(cs); in cpuset_can_attach()
2592 ret = -EINVAL; in cpuset_can_attach()
2596 ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); in cpuset_can_attach()
2598 reset_migrate_dl_data(cs); in cpuset_can_attach()
2608 cs->attach_in_progress++; in cpuset_can_attach()
2617 struct cpuset *cs; in cpuset_cancel_attach() local
2620 cs = css_cs(css); in cpuset_cancel_attach()
2623 cs->attach_in_progress--; in cpuset_cancel_attach()
2624 if (!cs->attach_in_progress) in cpuset_cancel_attach()
2627 if (cs->nr_migrate_dl_tasks) { in cpuset_cancel_attach()
2628 int cpu = cpumask_any(cs->effective_cpus); in cpuset_cancel_attach()
2630 dl_bw_free(cpu, cs->sum_migrate_dl_bw); in cpuset_cancel_attach()
2631 reset_migrate_dl_data(cs); in cpuset_cancel_attach()
2645 static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) in cpuset_attach_task() argument
2649 if (cs != &top_cpuset) in cpuset_attach_task()
2653 cs->subparts_cpus); in cpuset_attach_task()
2661 cpuset_update_task_spread_flags(cs, task); in cpuset_attach_task()
2669 struct cpuset *cs; in cpuset_attach() local
2674 cs = css_cs(css); in cpuset_attach()
2678 cpus_updated = !cpumask_equal(cs->effective_cpus, in cpuset_attach()
2679 oldcs->effective_cpus); in cpuset_attach()
2680 mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); in cpuset_attach()
2685 * in effective cpus and mems. In that case, we can optimize out in cpuset_attach()
2690 cpuset_attach_nodemask_to = cs->effective_mems; in cpuset_attach()
2691 goto out; in cpuset_attach()
2694 guarantee_online_mems(cs, &cpuset_attach_nodemask_to); in cpuset_attach()
2697 cpuset_attach_task(cs, task); in cpuset_attach()
2705 cpuset_attach_nodemask_to = cs->effective_mems; in cpuset_attach()
2706 if (!is_memory_migrate(cs) && !mems_updated) in cpuset_attach()
2707 goto out; in cpuset_attach()
2723 if (is_memory_migrate(cs)) in cpuset_attach()
2724 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, in cpuset_attach()
2731 out: in cpuset_attach()
2732 cs->old_mems_allowed = cpuset_attach_nodemask_to; in cpuset_attach()
2734 if (cs->nr_migrate_dl_tasks) { in cpuset_attach()
2735 cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks; in cpuset_attach()
2736 oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks; in cpuset_attach()
2737 reset_migrate_dl_data(cs); in cpuset_attach()
2740 cs->attach_in_progress--; in cpuset_attach()
2741 if (!cs->attach_in_progress) in cpuset_attach()
2771 struct cpuset *cs = css_cs(css); in cpuset_write_u64() local
2772 cpuset_filetype_t type = cft->private; in cpuset_write_u64()
2777 if (!is_cpuset_online(cs)) { in cpuset_write_u64()
2778 retval = -ENODEV; in cpuset_write_u64()
2784 retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); in cpuset_write_u64()
2787 retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); in cpuset_write_u64()
2790 retval = update_flag(CS_MEM_HARDWALL, cs, val); in cpuset_write_u64()
2793 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); in cpuset_write_u64()
2796 retval = update_flag(CS_MEMORY_MIGRATE, cs, val); in cpuset_write_u64()
2802 retval = update_flag(CS_SPREAD_PAGE, cs, val); in cpuset_write_u64()
2805 retval = update_flag(CS_SPREAD_SLAB, cs, val); in cpuset_write_u64()
2808 retval = -EINVAL; in cpuset_write_u64()
2820 struct cpuset *cs = css_cs(css); in cpuset_write_s64() local
2821 cpuset_filetype_t type = cft->private; in cpuset_write_s64()
2822 int retval = -ENODEV; in cpuset_write_s64()
2826 if (!is_cpuset_online(cs)) in cpuset_write_s64()
2831 retval = update_relax_domain_level(cs, val); in cpuset_write_s64()
2834 retval = -EINVAL; in cpuset_write_s64()
2849 struct cpuset *cs = css_cs(of_css(of)); in cpuset_write_resmask() local
2851 int retval = -ENODEV; in cpuset_write_resmask()
2856 * CPU or memory hotunplug may leave @cs w/o any execution in cpuset_write_resmask()
2861 * As writes to "cpus" or "mems" may restore @cs's execution in cpuset_write_resmask()
2870 * protection is okay as we check whether @cs is online after in cpuset_write_resmask()
2874 css_get(&cs->css); in cpuset_write_resmask()
2875 kernfs_break_active_protection(of->kn); in cpuset_write_resmask()
2880 if (!is_cpuset_online(cs)) in cpuset_write_resmask()
2883 trialcs = alloc_trial_cpuset(cs); in cpuset_write_resmask()
2885 retval = -ENOMEM; in cpuset_write_resmask()
2889 switch (of_cft(of)->private) { in cpuset_write_resmask()
2891 retval = update_cpumask(cs, trialcs, buf); in cpuset_write_resmask()
2894 retval = update_nodemask(cs, trialcs, buf); in cpuset_write_resmask()
2897 retval = -EINVAL; in cpuset_write_resmask()
2905 kernfs_unbreak_active_protection(of->kn); in cpuset_write_resmask()
2906 css_put(&cs->css); in cpuset_write_resmask()
2921 struct cpuset *cs = css_cs(seq_css(sf)); in cpuset_common_seq_show() local
2922 cpuset_filetype_t type = seq_cft(sf)->private; in cpuset_common_seq_show()
2929 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); in cpuset_common_seq_show()
2932 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); in cpuset_common_seq_show()
2935 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus)); in cpuset_common_seq_show()
2938 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); in cpuset_common_seq_show()
2941 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->subparts_cpus)); in cpuset_common_seq_show()
2944 ret = -EINVAL; in cpuset_common_seq_show()
2953 struct cpuset *cs = css_cs(css); in cpuset_read_u64() local
2954 cpuset_filetype_t type = cft->private; in cpuset_read_u64()
2957 return is_cpu_exclusive(cs); in cpuset_read_u64()
2959 return is_mem_exclusive(cs); in cpuset_read_u64()
2961 return is_mem_hardwall(cs); in cpuset_read_u64()
2963 return is_sched_load_balance(cs); in cpuset_read_u64()
2965 return is_memory_migrate(cs); in cpuset_read_u64()
2969 return fmeter_getrate(&cs->fmeter); in cpuset_read_u64()
2971 return is_spread_page(cs); in cpuset_read_u64()
2973 return is_spread_slab(cs); in cpuset_read_u64()
2984 struct cpuset *cs = css_cs(css); in cpuset_read_s64() local
2985 cpuset_filetype_t type = cft->private; in cpuset_read_s64()
2988 return cs->relax_domain_level; in cpuset_read_s64()
2999 struct cpuset *cs = css_cs(seq_css(seq)); in sched_partition_show() local
3002 switch (cs->partition_root_state) { in sched_partition_show()
3018 err = perr_strings[READ_ONCE(cs->prs_err)]; in sched_partition_show()
3031 struct cpuset *cs = css_cs(of_css(of)); in sched_partition_write() local
3033 int retval = -ENODEV; in sched_partition_write()
3047 return -EINVAL; in sched_partition_write()
3049 css_get(&cs->css); in sched_partition_write()
3052 if (!is_cpuset_online(cs)) in sched_partition_write()
3055 retval = update_prstate(cs, val); in sched_partition_write()
3059 css_put(&cs->css); in sched_partition_write()
3225 * cpuset_css_alloc - Allocate a cpuset css
3228 * Return: cpuset css on success, -ENOMEM on failure.
3230 * Allocate and initialize a new cpuset css, for non-NULL @parent_css, return
3236 struct cpuset *cs; in cpuset_css_alloc() local
3241 cs = kzalloc(sizeof(*cs), GFP_KERNEL); in cpuset_css_alloc()
3242 if (!cs) in cpuset_css_alloc()
3243 return ERR_PTR(-ENOMEM); in cpuset_css_alloc()
3245 if (alloc_cpumasks(cs, NULL)) { in cpuset_css_alloc()
3246 kfree(cs); in cpuset_css_alloc()
3247 return ERR_PTR(-ENOMEM); in cpuset_css_alloc()
3250 __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_alloc()
3251 nodes_clear(cs->mems_allowed); in cpuset_css_alloc()
3252 nodes_clear(cs->effective_mems); in cpuset_css_alloc()
3253 fmeter_init(&cs->fmeter); in cpuset_css_alloc()
3254 cs->relax_domain_level = -1; in cpuset_css_alloc()
3258 __set_bit(CS_MEMORY_MIGRATE, &cs->flags); in cpuset_css_alloc()
3260 return &cs->css; in cpuset_css_alloc()
3265 struct cpuset *cs = css_cs(css); in cpuset_css_online() local
3266 struct cpuset *parent = parent_cs(cs); in cpuset_css_online()
3276 set_bit(CS_ONLINE, &cs->flags); in cpuset_css_online()
3278 set_bit(CS_SPREAD_PAGE, &cs->flags); in cpuset_css_online()
3280 set_bit(CS_SPREAD_SLAB, &cs->flags); in cpuset_css_online()
3286 cpumask_copy(cs->effective_cpus, parent->effective_cpus); in cpuset_css_online()
3287 cs->effective_mems = parent->effective_mems; in cpuset_css_online()
3288 cs->use_parent_ecpus = true; in cpuset_css_online()
3289 parent->child_ecpus_count++; in cpuset_css_online()
3297 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_online()
3301 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) in cpuset_css_online()
3307 * historical reasons - the flag may be specified during mount. in cpuset_css_online()
3310 * refuse to clone the configuration - thereby refusing the task to in cpuset_css_online()
3314 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive in cpuset_css_online()
3327 cs->mems_allowed = parent->mems_allowed; in cpuset_css_online()
3328 cs->effective_mems = parent->mems_allowed; in cpuset_css_online()
3329 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); in cpuset_css_online()
3330 cpumask_copy(cs->effective_cpus, parent->cpus_allowed); in cpuset_css_online()
3351 struct cpuset *cs = css_cs(css); in cpuset_css_offline() local
3356 if (is_partition_valid(cs)) in cpuset_css_offline()
3357 update_prstate(cs, 0); in cpuset_css_offline()
3360 is_sched_load_balance(cs)) in cpuset_css_offline()
3361 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); in cpuset_css_offline()
3363 if (cs->use_parent_ecpus) { in cpuset_css_offline()
3364 struct cpuset *parent = parent_cs(cs); in cpuset_css_offline()
3366 cs->use_parent_ecpus = false; in cpuset_css_offline()
3367 parent->child_ecpus_count--; in cpuset_css_offline()
3371 clear_bit(CS_ONLINE, &cs->flags); in cpuset_css_offline()
3379 struct cpuset *cs = css_cs(css); in cpuset_css_free() local
3381 free_cpuset(cs); in cpuset_css_free()
3408 struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); in cpuset_can_fork() local
3413 same_cs = (cs == task_cs(current)); in cpuset_can_fork()
3423 ret = cpuset_can_attach_check(cs); in cpuset_can_fork()
3439 cs->attach_in_progress++; in cpuset_can_fork()
3447 struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); in cpuset_cancel_fork() local
3451 same_cs = (cs == task_cs(current)); in cpuset_cancel_fork()
3458 cs->attach_in_progress--; in cpuset_cancel_fork()
3459 if (!cs->attach_in_progress) in cpuset_cancel_fork()
3471 struct cpuset *cs; in cpuset_fork() local
3475 cs = task_cs(task); in cpuset_fork()
3476 same_cs = (cs == task_cs(current)); in cpuset_fork()
3480 if (cs == &top_cpuset) in cpuset_fork()
3483 set_cpus_allowed_ptr(task, current->cpus_ptr); in cpuset_fork()
3484 task->mems_allowed = current->mems_allowed; in cpuset_fork()
3490 guarantee_online_mems(cs, &cpuset_attach_nodemask_to); in cpuset_fork()
3491 cpuset_attach_task(cs, task); in cpuset_fork()
3493 cs->attach_in_progress--; in cpuset_fork()
3494 if (!cs->attach_in_progress) in cpuset_fork()
3520 * cpuset_init - initialize cpusets at system boot
3538 top_cpuset.relax_domain_level = -1; in cpuset_init()
3550 * cpuset to its next-highest non-empty parent.
3552 static void remove_tasks_in_empty_cpuset(struct cpuset *cs) in remove_tasks_in_empty_cpuset() argument
3557 * Find its next-highest non-empty parent, (top cpuset in remove_tasks_in_empty_cpuset()
3560 parent = parent_cs(cs); in remove_tasks_in_empty_cpuset()
3561 while (cpumask_empty(parent->cpus_allowed) || in remove_tasks_in_empty_cpuset()
3562 nodes_empty(parent->mems_allowed)) in remove_tasks_in_empty_cpuset()
3565 if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { in remove_tasks_in_empty_cpuset()
3566 pr_err("cpuset: failed to transfer tasks out of empty cpuset "); in remove_tasks_in_empty_cpuset()
3567 pr_cont_cgroup_name(cs->css.cgroup); in remove_tasks_in_empty_cpuset()
3573 hotplug_update_tasks_legacy(struct cpuset *cs, in hotplug_update_tasks_legacy() argument
3580 cpumask_copy(cs->cpus_allowed, new_cpus); in hotplug_update_tasks_legacy()
3581 cpumask_copy(cs->effective_cpus, new_cpus); in hotplug_update_tasks_legacy()
3582 cs->mems_allowed = *new_mems; in hotplug_update_tasks_legacy()
3583 cs->effective_mems = *new_mems; in hotplug_update_tasks_legacy()
3590 if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) in hotplug_update_tasks_legacy()
3591 update_tasks_cpumask(cs, new_cpus); in hotplug_update_tasks_legacy()
3592 if (mems_updated && !nodes_empty(cs->mems_allowed)) in hotplug_update_tasks_legacy()
3593 update_tasks_nodemask(cs); in hotplug_update_tasks_legacy()
3595 is_empty = cpumask_empty(cs->cpus_allowed) || in hotplug_update_tasks_legacy()
3596 nodes_empty(cs->mems_allowed); in hotplug_update_tasks_legacy()
3605 remove_tasks_in_empty_cpuset(cs); in hotplug_update_tasks_legacy()
3611 hotplug_update_tasks(struct cpuset *cs, in hotplug_update_tasks() argument
3616 if (cpumask_empty(new_cpus) && !is_partition_valid(cs)) in hotplug_update_tasks()
3617 cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); in hotplug_update_tasks()
3619 *new_mems = parent_cs(cs)->effective_mems; in hotplug_update_tasks()
3622 cpumask_copy(cs->effective_cpus, new_cpus); in hotplug_update_tasks()
3623 cs->effective_mems = *new_mems; in hotplug_update_tasks()
3627 update_tasks_cpumask(cs, new_cpus); in hotplug_update_tasks()
3629 update_tasks_nodemask(cs); in hotplug_update_tasks()
3640 * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
3641 * @cs: cpuset in interest
3644 * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
3645 * offline, update @cs accordingly. If @cs ends up with no CPU or memory,
3648 static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) in cpuset_hotplug_update_tasks() argument
3656 wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); in cpuset_hotplug_update_tasks()
3664 if (cs->attach_in_progress) { in cpuset_hotplug_update_tasks()
3669 parent = parent_cs(cs); in cpuset_hotplug_update_tasks()
3670 compute_effective_cpumask(&new_cpus, cs, parent); in cpuset_hotplug_update_tasks()
3671 nodes_and(new_mems, cs->mems_allowed, parent->effective_mems); in cpuset_hotplug_update_tasks()
3673 if (cs->nr_subparts_cpus) in cpuset_hotplug_update_tasks()
3678 cpumask_andnot(&new_cpus, &new_cpus, cs->subparts_cpus); in cpuset_hotplug_update_tasks()
3680 if (!tmp || !cs->partition_root_state) in cpuset_hotplug_update_tasks()
3689 if (cs->nr_subparts_cpus && is_partition_valid(cs) && in cpuset_hotplug_update_tasks()
3690 cpumask_empty(&new_cpus) && partition_is_populated(cs, NULL)) { in cpuset_hotplug_update_tasks()
3692 cs->nr_subparts_cpus = 0; in cpuset_hotplug_update_tasks()
3693 cpumask_clear(cs->subparts_cpus); in cpuset_hotplug_update_tasks()
3695 compute_effective_cpumask(&new_cpus, cs, parent); in cpuset_hotplug_update_tasks()
3705 if (is_partition_valid(cs) && (!parent->nr_subparts_cpus || in cpuset_hotplug_update_tasks()
3706 (cpumask_empty(&new_cpus) && partition_is_populated(cs, NULL)))) { in cpuset_hotplug_update_tasks()
3709 update_parent_subparts_cpumask(cs, partcmd_disable, NULL, tmp); in cpuset_hotplug_update_tasks()
3710 if (cs->nr_subparts_cpus) { in cpuset_hotplug_update_tasks()
3712 cs->nr_subparts_cpus = 0; in cpuset_hotplug_update_tasks()
3713 cpumask_clear(cs->subparts_cpus); in cpuset_hotplug_update_tasks()
3715 compute_effective_cpumask(&new_cpus, cs, parent); in cpuset_hotplug_update_tasks()
3718 old_prs = cs->partition_root_state; in cpuset_hotplug_update_tasks()
3719 parent_prs = parent->partition_root_state; in cpuset_hotplug_update_tasks()
3720 if (is_partition_valid(cs)) { in cpuset_hotplug_update_tasks()
3722 make_partition_invalid(cs); in cpuset_hotplug_update_tasks()
3725 WRITE_ONCE(cs->prs_err, PERR_INVPARENT); in cpuset_hotplug_update_tasks()
3727 WRITE_ONCE(cs->prs_err, PERR_NOTPART); in cpuset_hotplug_update_tasks()
3729 WRITE_ONCE(cs->prs_err, PERR_HOTPLUG); in cpuset_hotplug_update_tasks()
3730 notify_partition_change(cs, old_prs); in cpuset_hotplug_update_tasks()
3739 else if (is_partition_valid(parent) && is_partition_invalid(cs)) { in cpuset_hotplug_update_tasks()
3740 update_parent_subparts_cpumask(cs, partcmd_update, NULL, tmp); in cpuset_hotplug_update_tasks()
3741 if (is_partition_valid(cs)) in cpuset_hotplug_update_tasks()
3746 cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); in cpuset_hotplug_update_tasks()
3747 mems_updated = !nodes_equal(new_mems, cs->effective_mems); in cpuset_hotplug_update_tasks()
3755 hotplug_update_tasks(cs, &new_cpus, &new_mems, in cpuset_hotplug_update_tasks()
3758 hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, in cpuset_hotplug_update_tasks()
3766 * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
3775 * Non-root cpusets are only affected by offlining. If any CPUs or memory
3795 /* fetch the available cpus/mems and find out which changed how */ in cpuset_hotplug_workfn()
3854 struct cpuset *cs; in cpuset_hotplug_workfn() local
3858 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { in cpuset_hotplug_workfn()
3859 if (cs == &top_cpuset || !css_tryget_online(&cs->css)) in cpuset_hotplug_workfn()
3863 cpuset_hotplug_update_tasks(cs, ptmp); in cpuset_hotplug_workfn()
3866 css_put(&cs->css); in cpuset_hotplug_workfn()
3908 * cpuset_init_smp - initialize cpus_allowed
3931 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
3932 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
3936 * attached to the specified @tsk. Guaranteed to return some non-empty
3944 struct cpuset *cs; in cpuset_cpus_allowed() local
3949 cs = task_cs(tsk); in cpuset_cpus_allowed()
3950 if (cs != &top_cpuset) in cpuset_cpus_allowed()
3957 if ((cs == &top_cpuset) || cpumask_empty(pmask)) { in cpuset_cpus_allowed()
3974 * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
3978 * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
3979 * mode however, this value is the same as task_cs(tsk)->effective_cpus,
3994 cs_mask = task_cs(tsk)->cpus_allowed; in cpuset_cpus_allowed_fallback()
4002 * We own tsk->cpus_allowed, nobody can change it under us. in cpuset_cpus_allowed_fallback()
4004 * But we used cs && cs->cpus_allowed lockless and thus can in cpuset_cpus_allowed_fallback()
4006 * the wrong tsk->cpus_allowed. However, both cases imply the in cpuset_cpus_allowed_fallback()
4007 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr() in cpuset_cpus_allowed_fallback()
4011 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary in cpuset_cpus_allowed_fallback()
4023 nodes_setall(current->mems_allowed); in cpuset_init_current_mems_allowed()
4027 * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
4028 * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
4031 * attached to the specified @tsk. Guaranteed to return some non-empty
4051 * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed
4054 * Are any of the nodes in the nodemask allowed in current->mems_allowed?
4058 return nodes_intersects(*nodemask, current->mems_allowed); in cpuset_nodemask_valid_mems_allowed()
4062 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
4067 static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) in nearest_hardwall_ancestor() argument
4069 while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs)) in nearest_hardwall_ancestor()
4070 cs = parent_cs(cs); in nearest_hardwall_ancestor()
4071 return cs; in nearest_hardwall_ancestor()
4075 * cpuset_node_allowed - Can we allocate on a memory node?
4108 * in_interrupt - any node ok (current task context irrelevant)
4109 * GFP_ATOMIC - any node ok
4110 * tsk_is_oom_victim - any node ok
4111 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
4112 * GFP_USER - only nodes in current tasks mems allowed ok.
4116 struct cpuset *cs; /* current cpuset ancestors */ in cpuset_node_allowed() local
4122 if (node_isset(node, current->mems_allowed)) in cpuset_node_allowed()
4133 if (current->flags & PF_EXITING) /* Let dying task have memory */ in cpuset_node_allowed()
4140 cs = nearest_hardwall_ancestor(task_cs(current)); in cpuset_node_allowed()
4141 allowed = node_isset(node, cs->mems_allowed); in cpuset_node_allowed()
4149 * cpuset_spread_node() - On which node to begin search for a page
4165 * only set nodes in task->mems_allowed that are online. So it
4176 return *rotor = next_node_in(*rotor, current->mems_allowed); in cpuset_spread_node()
4180 * cpuset_mem_spread_node() - On which node to begin search for a file page
4184 if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) in cpuset_mem_spread_node()
4185 current->cpuset_mem_spread_rotor = in cpuset_mem_spread_node()
4186 node_random(&current->mems_allowed); in cpuset_mem_spread_node()
4188 return cpuset_spread_node(&current->cpuset_mem_spread_rotor); in cpuset_mem_spread_node()
4192 * cpuset_slab_spread_node() - On which node to begin search for a slab page
4196 if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE) in cpuset_slab_spread_node()
4197 current->cpuset_slab_spread_rotor = in cpuset_slab_spread_node()
4198 node_random(&current->mems_allowed); in cpuset_slab_spread_node()
4200 return cpuset_spread_node(&current->cpuset_slab_spread_rotor); in cpuset_slab_spread_node()
4205 * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
4218 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); in cpuset_mems_allowed_intersects()
4222 * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
4233 cgrp = task_cs(current)->css.cgroup; in cpuset_print_current_mems_allowed()
4237 nodemask_pr_args(&current->mems_allowed)); in cpuset_print_current_mems_allowed()
4251 * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
4262 * Display to user space in the per-cpuset read-only file
4271 fmeter_markevent(&task_cs(current)->fmeter); in __cpuset_memory_pressure_bump()
4278 * - Print tasks cpuset path into seq_file.
4279 * - Used for /proc/<pid>/cpuset.
4280 * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
4281 * doesn't really matter if tsk->cpuset changes after we read it,
4292 retval = -ENOMEM; in proc_cpuset_show()
4295 goto out; in proc_cpuset_show()
4300 retval = cgroup_path_ns_locked(css->cgroup, buf, PATH_MAX, in proc_cpuset_show()
4301 current->nsproxy->cgroup_ns); in proc_cpuset_show()
4305 if (retval == -E2BIG) in proc_cpuset_show()
4306 retval = -ENAMETOOLONG; in proc_cpuset_show()
4314 out: in proc_cpuset_show()
4323 nodemask_pr_args(&task->mems_allowed)); in cpuset_task_status_allowed()
4325 nodemask_pr_args(&task->mems_allowed)); in cpuset_task_status_allowed()