xref: /openbmc/linux/kernel/cgroup/freezer.c (revision 76f969e8948d82e78e1bc4beb6b9465908e74873)
1 //SPDX-License-Identifier: GPL-2.0
2 #include <linux/cgroup.h>
3 #include <linux/sched.h>
4 #include <linux/sched/task.h>
5 #include <linux/sched/signal.h>
6 
7 #include "cgroup-internal.h"
8 
9 /*
10  * Propagate the cgroup frozen state upwards by the cgroup tree.
11  */
12 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
13 {
14 	int desc = 1;
15 
16 	/*
17 	 * If the new state is frozen, some freezing ancestor cgroups may change
18 	 * their state too, depending on if all their descendants are frozen.
19 	 *
20 	 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
21 	 */
22 	while ((cgrp = cgroup_parent(cgrp))) {
23 		if (frozen) {
24 			cgrp->freezer.nr_frozen_descendants += desc;
25 			if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
26 			    test_bit(CGRP_FREEZE, &cgrp->flags) &&
27 			    cgrp->freezer.nr_frozen_descendants ==
28 			    cgrp->nr_descendants) {
29 				set_bit(CGRP_FROZEN, &cgrp->flags);
30 				cgroup_file_notify(&cgrp->events_file);
31 				desc++;
32 			}
33 		} else {
34 			cgrp->freezer.nr_frozen_descendants -= desc;
35 			if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
36 				clear_bit(CGRP_FROZEN, &cgrp->flags);
37 				cgroup_file_notify(&cgrp->events_file);
38 				desc++;
39 			}
40 		}
41 	}
42 }
43 
44 /*
45  * Revisit the cgroup frozen state.
46  * Checks if the cgroup is really frozen and perform all state transitions.
47  */
48 void cgroup_update_frozen(struct cgroup *cgrp)
49 {
50 	bool frozen;
51 
52 	lockdep_assert_held(&css_set_lock);
53 
54 	/*
55 	 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
56 	 * and all tasks are frozen and/or stopped, let's consider
57 	 * the cgroup frozen. Otherwise it's not frozen.
58 	 */
59 	frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
60 		cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
61 
62 	if (frozen) {
63 		/* Already there? */
64 		if (test_bit(CGRP_FROZEN, &cgrp->flags))
65 			return;
66 
67 		set_bit(CGRP_FROZEN, &cgrp->flags);
68 	} else {
69 		/* Already there? */
70 		if (!test_bit(CGRP_FROZEN, &cgrp->flags))
71 			return;
72 
73 		clear_bit(CGRP_FROZEN, &cgrp->flags);
74 	}
75 	cgroup_file_notify(&cgrp->events_file);
76 
77 	/* Update the state of ancestor cgroups. */
78 	cgroup_propagate_frozen(cgrp, frozen);
79 }
80 
81 /*
82  * Increment cgroup's nr_frozen_tasks.
83  */
84 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
85 {
86 	cgrp->freezer.nr_frozen_tasks++;
87 }
88 
89 /*
90  * Decrement cgroup's nr_frozen_tasks.
91  */
92 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
93 {
94 	cgrp->freezer.nr_frozen_tasks--;
95 	WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
96 }
97 
98 /*
99  * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
100  * and revisit the state of the cgroup, if necessary.
101  */
102 void cgroup_enter_frozen(void)
103 {
104 	struct cgroup *cgrp;
105 
106 	if (current->frozen)
107 		return;
108 
109 	spin_lock_irq(&css_set_lock);
110 	current->frozen = true;
111 	cgrp = task_dfl_cgroup(current);
112 	cgroup_inc_frozen_cnt(cgrp);
113 	cgroup_update_frozen(cgrp);
114 	spin_unlock_irq(&css_set_lock);
115 }
116 
117 /*
118  * Conditionally leave frozen/stopped state. Update cgroup's counters,
119  * and revisit the state of the cgroup, if necessary.
120  *
121  * If always_leave is not set, and the cgroup is freezing,
122  * we're racing with the cgroup freezing. In this case, we don't
123  * drop the frozen counter to avoid a transient switch to
124  * the unfrozen state.
125  */
126 void cgroup_leave_frozen(bool always_leave)
127 {
128 	struct cgroup *cgrp;
129 
130 	spin_lock_irq(&css_set_lock);
131 	cgrp = task_dfl_cgroup(current);
132 	if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
133 		cgroup_dec_frozen_cnt(cgrp);
134 		cgroup_update_frozen(cgrp);
135 		WARN_ON_ONCE(!current->frozen);
136 		current->frozen = false;
137 	}
138 	spin_unlock_irq(&css_set_lock);
139 
140 	if (unlikely(current->frozen)) {
141 		/*
142 		 * If the task remained in the frozen state,
143 		 * make sure it won't reach userspace without
144 		 * entering the signal handling loop.
145 		 */
146 		spin_lock_irq(&current->sighand->siglock);
147 		recalc_sigpending();
148 		spin_unlock_irq(&current->sighand->siglock);
149 	}
150 }
151 
152 /*
153  * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
154  * jobctl bit.
155  */
156 static void cgroup_freeze_task(struct task_struct *task, bool freeze)
157 {
158 	unsigned long flags;
159 
160 	/* If the task is about to die, don't bother with freezing it. */
161 	if (!lock_task_sighand(task, &flags))
162 		return;
163 
164 	if (freeze) {
165 		task->jobctl |= JOBCTL_TRAP_FREEZE;
166 		signal_wake_up(task, false);
167 	} else {
168 		task->jobctl &= ~JOBCTL_TRAP_FREEZE;
169 		wake_up_process(task);
170 	}
171 
172 	unlock_task_sighand(task, &flags);
173 }
174 
175 /*
176  * Freeze or unfreeze all tasks in the given cgroup.
177  */
178 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
179 {
180 	struct css_task_iter it;
181 	struct task_struct *task;
182 
183 	lockdep_assert_held(&cgroup_mutex);
184 
185 	spin_lock_irq(&css_set_lock);
186 	if (freeze)
187 		set_bit(CGRP_FREEZE, &cgrp->flags);
188 	else
189 		clear_bit(CGRP_FREEZE, &cgrp->flags);
190 	spin_unlock_irq(&css_set_lock);
191 
192 	css_task_iter_start(&cgrp->self, 0, &it);
193 	while ((task = css_task_iter_next(&it))) {
194 		/*
195 		 * Ignore kernel threads here. Freezing cgroups containing
196 		 * kthreads isn't supported.
197 		 */
198 		if (task->flags & PF_KTHREAD)
199 			continue;
200 		cgroup_freeze_task(task, freeze);
201 	}
202 	css_task_iter_end(&it);
203 
204 	/*
205 	 * Cgroup state should be revisited here to cover empty leaf cgroups
206 	 * and cgroups which descendants are already in the desired state.
207 	 */
208 	spin_lock_irq(&css_set_lock);
209 	if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
210 		cgroup_update_frozen(cgrp);
211 	spin_unlock_irq(&css_set_lock);
212 }
213 
214 /*
215  * Adjust the task state (freeze or unfreeze) and revisit the state of
216  * source and destination cgroups.
217  */
218 void cgroup_freezer_migrate_task(struct task_struct *task,
219 				 struct cgroup *src, struct cgroup *dst)
220 {
221 	lockdep_assert_held(&css_set_lock);
222 
223 	/*
224 	 * Kernel threads are not supposed to be frozen at all.
225 	 */
226 	if (task->flags & PF_KTHREAD)
227 		return;
228 
229 	/*
230 	 * Adjust counters of freezing and frozen tasks.
231 	 * Note, that if the task is frozen, but the destination cgroup is not
232 	 * frozen, we bump both counters to keep them balanced.
233 	 */
234 	if (task->frozen) {
235 		cgroup_inc_frozen_cnt(dst);
236 		cgroup_dec_frozen_cnt(src);
237 	}
238 	cgroup_update_frozen(dst);
239 	cgroup_update_frozen(src);
240 
241 	/*
242 	 * Force the task to the desired state.
243 	 */
244 	cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
245 }
246 
247 void cgroup_freezer_frozen_exit(struct task_struct *task)
248 {
249 	struct cgroup *cgrp = task_dfl_cgroup(task);
250 
251 	lockdep_assert_held(&css_set_lock);
252 
253 	cgroup_dec_frozen_cnt(cgrp);
254 	cgroup_update_frozen(cgrp);
255 }
256 
257 void cgroup_freeze(struct cgroup *cgrp, bool freeze)
258 {
259 	struct cgroup_subsys_state *css;
260 	struct cgroup *dsct;
261 	bool applied = false;
262 
263 	lockdep_assert_held(&cgroup_mutex);
264 
265 	/*
266 	 * Nothing changed? Just exit.
267 	 */
268 	if (cgrp->freezer.freeze == freeze)
269 		return;
270 
271 	cgrp->freezer.freeze = freeze;
272 
273 	/*
274 	 * Propagate changes downwards the cgroup tree.
275 	 */
276 	css_for_each_descendant_pre(css, &cgrp->self) {
277 		dsct = css->cgroup;
278 
279 		if (cgroup_is_dead(dsct))
280 			continue;
281 
282 		if (freeze) {
283 			dsct->freezer.e_freeze++;
284 			/*
285 			 * Already frozen because of ancestor's settings?
286 			 */
287 			if (dsct->freezer.e_freeze > 1)
288 				continue;
289 		} else {
290 			dsct->freezer.e_freeze--;
291 			/*
292 			 * Still frozen because of ancestor's settings?
293 			 */
294 			if (dsct->freezer.e_freeze > 0)
295 				continue;
296 
297 			WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
298 		}
299 
300 		/*
301 		 * Do change actual state: freeze or unfreeze.
302 		 */
303 		cgroup_do_freeze(dsct, freeze);
304 		applied = true;
305 	}
306 
307 	/*
308 	 * Even if the actual state hasn't changed, let's notify a user.
309 	 * The state can be enforced by an ancestor cgroup: the cgroup
310 	 * can already be in the desired state or it can be locked in the
311 	 * opposite state, so that the transition will never happen.
312 	 * In both cases it's better to notify a user, that there is
313 	 * nothing to wait for.
314 	 */
315 	if (!applied)
316 		cgroup_file_notify(&cgrp->events_file);
317 }
318