1 //SPDX-License-Identifier: GPL-2.0 2 #include <linux/cgroup.h> 3 #include <linux/sched.h> 4 #include <linux/sched/task.h> 5 #include <linux/sched/signal.h> 6 7 #include "cgroup-internal.h" 8 9 /* 10 * Propagate the cgroup frozen state upwards by the cgroup tree. 11 */ 12 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) 13 { 14 int desc = 1; 15 16 /* 17 * If the new state is frozen, some freezing ancestor cgroups may change 18 * their state too, depending on if all their descendants are frozen. 19 * 20 * Otherwise, all ancestor cgroups are forced into the non-frozen state. 21 */ 22 while ((cgrp = cgroup_parent(cgrp))) { 23 if (frozen) { 24 cgrp->freezer.nr_frozen_descendants += desc; 25 if (!test_bit(CGRP_FROZEN, &cgrp->flags) && 26 test_bit(CGRP_FREEZE, &cgrp->flags) && 27 cgrp->freezer.nr_frozen_descendants == 28 cgrp->nr_descendants) { 29 set_bit(CGRP_FROZEN, &cgrp->flags); 30 cgroup_file_notify(&cgrp->events_file); 31 desc++; 32 } 33 } else { 34 cgrp->freezer.nr_frozen_descendants -= desc; 35 if (test_bit(CGRP_FROZEN, &cgrp->flags)) { 36 clear_bit(CGRP_FROZEN, &cgrp->flags); 37 cgroup_file_notify(&cgrp->events_file); 38 desc++; 39 } 40 } 41 } 42 } 43 44 /* 45 * Revisit the cgroup frozen state. 46 * Checks if the cgroup is really frozen and perform all state transitions. 47 */ 48 void cgroup_update_frozen(struct cgroup *cgrp) 49 { 50 bool frozen; 51 52 lockdep_assert_held(&css_set_lock); 53 54 /* 55 * If the cgroup has to be frozen (CGRP_FREEZE bit set), 56 * and all tasks are frozen and/or stopped, let's consider 57 * the cgroup frozen. Otherwise it's not frozen. 58 */ 59 frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && 60 cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); 61 62 if (frozen) { 63 /* Already there? */ 64 if (test_bit(CGRP_FROZEN, &cgrp->flags)) 65 return; 66 67 set_bit(CGRP_FROZEN, &cgrp->flags); 68 } else { 69 /* Already there? */ 70 if (!test_bit(CGRP_FROZEN, &cgrp->flags)) 71 return; 72 73 clear_bit(CGRP_FROZEN, &cgrp->flags); 74 } 75 cgroup_file_notify(&cgrp->events_file); 76 77 /* Update the state of ancestor cgroups. */ 78 cgroup_propagate_frozen(cgrp, frozen); 79 } 80 81 /* 82 * Increment cgroup's nr_frozen_tasks. 83 */ 84 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) 85 { 86 cgrp->freezer.nr_frozen_tasks++; 87 } 88 89 /* 90 * Decrement cgroup's nr_frozen_tasks. 91 */ 92 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) 93 { 94 cgrp->freezer.nr_frozen_tasks--; 95 WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); 96 } 97 98 /* 99 * Enter frozen/stopped state, if not yet there. Update cgroup's counters, 100 * and revisit the state of the cgroup, if necessary. 101 */ 102 void cgroup_enter_frozen(void) 103 { 104 struct cgroup *cgrp; 105 106 if (current->frozen) 107 return; 108 109 spin_lock_irq(&css_set_lock); 110 current->frozen = true; 111 cgrp = task_dfl_cgroup(current); 112 cgroup_inc_frozen_cnt(cgrp); 113 cgroup_update_frozen(cgrp); 114 spin_unlock_irq(&css_set_lock); 115 } 116 117 /* 118 * Conditionally leave frozen/stopped state. Update cgroup's counters, 119 * and revisit the state of the cgroup, if necessary. 120 * 121 * If always_leave is not set, and the cgroup is freezing, 122 * we're racing with the cgroup freezing. In this case, we don't 123 * drop the frozen counter to avoid a transient switch to 124 * the unfrozen state. 125 */ 126 void cgroup_leave_frozen(bool always_leave) 127 { 128 struct cgroup *cgrp; 129 130 spin_lock_irq(&css_set_lock); 131 cgrp = task_dfl_cgroup(current); 132 if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { 133 cgroup_dec_frozen_cnt(cgrp); 134 cgroup_update_frozen(cgrp); 135 WARN_ON_ONCE(!current->frozen); 136 current->frozen = false; 137 } 138 spin_unlock_irq(&css_set_lock); 139 140 if (unlikely(current->frozen)) { 141 /* 142 * If the task remained in the frozen state, 143 * make sure it won't reach userspace without 144 * entering the signal handling loop. 145 */ 146 spin_lock_irq(¤t->sighand->siglock); 147 recalc_sigpending(); 148 spin_unlock_irq(¤t->sighand->siglock); 149 } 150 } 151 152 /* 153 * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE 154 * jobctl bit. 155 */ 156 static void cgroup_freeze_task(struct task_struct *task, bool freeze) 157 { 158 unsigned long flags; 159 160 /* If the task is about to die, don't bother with freezing it. */ 161 if (!lock_task_sighand(task, &flags)) 162 return; 163 164 if (freeze) { 165 task->jobctl |= JOBCTL_TRAP_FREEZE; 166 signal_wake_up(task, false); 167 } else { 168 task->jobctl &= ~JOBCTL_TRAP_FREEZE; 169 wake_up_process(task); 170 } 171 172 unlock_task_sighand(task, &flags); 173 } 174 175 /* 176 * Freeze or unfreeze all tasks in the given cgroup. 177 */ 178 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) 179 { 180 struct css_task_iter it; 181 struct task_struct *task; 182 183 lockdep_assert_held(&cgroup_mutex); 184 185 spin_lock_irq(&css_set_lock); 186 if (freeze) 187 set_bit(CGRP_FREEZE, &cgrp->flags); 188 else 189 clear_bit(CGRP_FREEZE, &cgrp->flags); 190 spin_unlock_irq(&css_set_lock); 191 192 css_task_iter_start(&cgrp->self, 0, &it); 193 while ((task = css_task_iter_next(&it))) { 194 /* 195 * Ignore kernel threads here. Freezing cgroups containing 196 * kthreads isn't supported. 197 */ 198 if (task->flags & PF_KTHREAD) 199 continue; 200 cgroup_freeze_task(task, freeze); 201 } 202 css_task_iter_end(&it); 203 204 /* 205 * Cgroup state should be revisited here to cover empty leaf cgroups 206 * and cgroups which descendants are already in the desired state. 207 */ 208 spin_lock_irq(&css_set_lock); 209 if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) 210 cgroup_update_frozen(cgrp); 211 spin_unlock_irq(&css_set_lock); 212 } 213 214 /* 215 * Adjust the task state (freeze or unfreeze) and revisit the state of 216 * source and destination cgroups. 217 */ 218 void cgroup_freezer_migrate_task(struct task_struct *task, 219 struct cgroup *src, struct cgroup *dst) 220 { 221 lockdep_assert_held(&css_set_lock); 222 223 /* 224 * Kernel threads are not supposed to be frozen at all. 225 */ 226 if (task->flags & PF_KTHREAD) 227 return; 228 229 /* 230 * Adjust counters of freezing and frozen tasks. 231 * Note, that if the task is frozen, but the destination cgroup is not 232 * frozen, we bump both counters to keep them balanced. 233 */ 234 if (task->frozen) { 235 cgroup_inc_frozen_cnt(dst); 236 cgroup_dec_frozen_cnt(src); 237 } 238 cgroup_update_frozen(dst); 239 cgroup_update_frozen(src); 240 241 /* 242 * Force the task to the desired state. 243 */ 244 cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); 245 } 246 247 void cgroup_freezer_frozen_exit(struct task_struct *task) 248 { 249 struct cgroup *cgrp = task_dfl_cgroup(task); 250 251 lockdep_assert_held(&css_set_lock); 252 253 cgroup_dec_frozen_cnt(cgrp); 254 cgroup_update_frozen(cgrp); 255 } 256 257 void cgroup_freeze(struct cgroup *cgrp, bool freeze) 258 { 259 struct cgroup_subsys_state *css; 260 struct cgroup *dsct; 261 bool applied = false; 262 263 lockdep_assert_held(&cgroup_mutex); 264 265 /* 266 * Nothing changed? Just exit. 267 */ 268 if (cgrp->freezer.freeze == freeze) 269 return; 270 271 cgrp->freezer.freeze = freeze; 272 273 /* 274 * Propagate changes downwards the cgroup tree. 275 */ 276 css_for_each_descendant_pre(css, &cgrp->self) { 277 dsct = css->cgroup; 278 279 if (cgroup_is_dead(dsct)) 280 continue; 281 282 if (freeze) { 283 dsct->freezer.e_freeze++; 284 /* 285 * Already frozen because of ancestor's settings? 286 */ 287 if (dsct->freezer.e_freeze > 1) 288 continue; 289 } else { 290 dsct->freezer.e_freeze--; 291 /* 292 * Still frozen because of ancestor's settings? 293 */ 294 if (dsct->freezer.e_freeze > 0) 295 continue; 296 297 WARN_ON_ONCE(dsct->freezer.e_freeze < 0); 298 } 299 300 /* 301 * Do change actual state: freeze or unfreeze. 302 */ 303 cgroup_do_freeze(dsct, freeze); 304 applied = true; 305 } 306 307 /* 308 * Even if the actual state hasn't changed, let's notify a user. 309 * The state can be enforced by an ancestor cgroup: the cgroup 310 * can already be in the desired state or it can be locked in the 311 * opposite state, so that the transition will never happen. 312 * In both cases it's better to notify a user, that there is 313 * nothing to wait for. 314 */ 315 if (!applied) 316 cgroup_file_notify(&cgrp->events_file); 317 } 318