xref: /openbmc/linux/kernel/cgroup/freezer.c (revision e87c65ae)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/cgroup.h>
3 #include <linux/sched.h>
4 #include <linux/sched/task.h>
5 #include <linux/sched/signal.h>
6 
7 #include "cgroup-internal.h"
8 
9 #include <trace/events/cgroup.h>
10 
11 /*
12  * Propagate the cgroup frozen state upwards by the cgroup tree.
13  */
14 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
15 {
16 	int desc = 1;
17 
18 	/*
19 	 * If the new state is frozen, some freezing ancestor cgroups may change
20 	 * their state too, depending on if all their descendants are frozen.
21 	 *
22 	 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
23 	 */
24 	while ((cgrp = cgroup_parent(cgrp))) {
25 		if (frozen) {
26 			cgrp->freezer.nr_frozen_descendants += desc;
27 			if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
28 			    test_bit(CGRP_FREEZE, &cgrp->flags) &&
29 			    cgrp->freezer.nr_frozen_descendants ==
30 			    cgrp->nr_descendants) {
31 				set_bit(CGRP_FROZEN, &cgrp->flags);
32 				cgroup_file_notify(&cgrp->events_file);
33 				TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
34 				desc++;
35 			}
36 		} else {
37 			cgrp->freezer.nr_frozen_descendants -= desc;
38 			if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
39 				clear_bit(CGRP_FROZEN, &cgrp->flags);
40 				cgroup_file_notify(&cgrp->events_file);
41 				TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
42 				desc++;
43 			}
44 		}
45 	}
46 }
47 
48 /*
49  * Revisit the cgroup frozen state.
50  * Checks if the cgroup is really frozen and perform all state transitions.
51  */
52 void cgroup_update_frozen(struct cgroup *cgrp)
53 {
54 	bool frozen;
55 
56 	lockdep_assert_held(&css_set_lock);
57 
58 	/*
59 	 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
60 	 * and all tasks are frozen and/or stopped, let's consider
61 	 * the cgroup frozen. Otherwise it's not frozen.
62 	 */
63 	frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
64 		cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
65 
66 	if (frozen) {
67 		/* Already there? */
68 		if (test_bit(CGRP_FROZEN, &cgrp->flags))
69 			return;
70 
71 		set_bit(CGRP_FROZEN, &cgrp->flags);
72 	} else {
73 		/* Already there? */
74 		if (!test_bit(CGRP_FROZEN, &cgrp->flags))
75 			return;
76 
77 		clear_bit(CGRP_FROZEN, &cgrp->flags);
78 	}
79 	cgroup_file_notify(&cgrp->events_file);
80 	TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
81 
82 	/* Update the state of ancestor cgroups. */
83 	cgroup_propagate_frozen(cgrp, frozen);
84 }
85 
86 /*
87  * Increment cgroup's nr_frozen_tasks.
88  */
89 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
90 {
91 	cgrp->freezer.nr_frozen_tasks++;
92 }
93 
94 /*
95  * Decrement cgroup's nr_frozen_tasks.
96  */
97 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
98 {
99 	cgrp->freezer.nr_frozen_tasks--;
100 	WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
101 }
102 
103 /*
104  * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
105  * and revisit the state of the cgroup, if necessary.
106  */
107 void cgroup_enter_frozen(void)
108 {
109 	struct cgroup *cgrp;
110 
111 	if (current->frozen)
112 		return;
113 
114 	spin_lock_irq(&css_set_lock);
115 	current->frozen = true;
116 	cgrp = task_dfl_cgroup(current);
117 	cgroup_inc_frozen_cnt(cgrp);
118 	cgroup_update_frozen(cgrp);
119 	spin_unlock_irq(&css_set_lock);
120 }
121 
122 /*
123  * Conditionally leave frozen/stopped state. Update cgroup's counters,
124  * and revisit the state of the cgroup, if necessary.
125  *
126  * If always_leave is not set, and the cgroup is freezing,
127  * we're racing with the cgroup freezing. In this case, we don't
128  * drop the frozen counter to avoid a transient switch to
129  * the unfrozen state.
130  */
131 void cgroup_leave_frozen(bool always_leave)
132 {
133 	struct cgroup *cgrp;
134 
135 	spin_lock_irq(&css_set_lock);
136 	cgrp = task_dfl_cgroup(current);
137 	if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
138 		cgroup_dec_frozen_cnt(cgrp);
139 		cgroup_update_frozen(cgrp);
140 		WARN_ON_ONCE(!current->frozen);
141 		current->frozen = false;
142 	} else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
143 		spin_lock(&current->sighand->siglock);
144 		current->jobctl |= JOBCTL_TRAP_FREEZE;
145 		set_thread_flag(TIF_SIGPENDING);
146 		spin_unlock(&current->sighand->siglock);
147 	}
148 	spin_unlock_irq(&css_set_lock);
149 }
150 
151 /*
152  * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
153  * jobctl bit.
154  */
155 static void cgroup_freeze_task(struct task_struct *task, bool freeze)
156 {
157 	unsigned long flags;
158 
159 	/* If the task is about to die, don't bother with freezing it. */
160 	if (!lock_task_sighand(task, &flags))
161 		return;
162 
163 	if (freeze) {
164 		task->jobctl |= JOBCTL_TRAP_FREEZE;
165 		signal_wake_up(task, false);
166 	} else {
167 		task->jobctl &= ~JOBCTL_TRAP_FREEZE;
168 		wake_up_process(task);
169 	}
170 
171 	unlock_task_sighand(task, &flags);
172 }
173 
174 /*
175  * Freeze or unfreeze all tasks in the given cgroup.
176  */
177 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
178 {
179 	struct css_task_iter it;
180 	struct task_struct *task;
181 
182 	lockdep_assert_held(&cgroup_mutex);
183 
184 	spin_lock_irq(&css_set_lock);
185 	if (freeze)
186 		set_bit(CGRP_FREEZE, &cgrp->flags);
187 	else
188 		clear_bit(CGRP_FREEZE, &cgrp->flags);
189 	spin_unlock_irq(&css_set_lock);
190 
191 	if (freeze)
192 		TRACE_CGROUP_PATH(freeze, cgrp);
193 	else
194 		TRACE_CGROUP_PATH(unfreeze, cgrp);
195 
196 	css_task_iter_start(&cgrp->self, 0, &it);
197 	while ((task = css_task_iter_next(&it))) {
198 		/*
199 		 * Ignore kernel threads here. Freezing cgroups containing
200 		 * kthreads isn't supported.
201 		 */
202 		if (task->flags & PF_KTHREAD)
203 			continue;
204 		cgroup_freeze_task(task, freeze);
205 	}
206 	css_task_iter_end(&it);
207 
208 	/*
209 	 * Cgroup state should be revisited here to cover empty leaf cgroups
210 	 * and cgroups which descendants are already in the desired state.
211 	 */
212 	spin_lock_irq(&css_set_lock);
213 	if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
214 		cgroup_update_frozen(cgrp);
215 	spin_unlock_irq(&css_set_lock);
216 }
217 
218 /*
219  * Adjust the task state (freeze or unfreeze) and revisit the state of
220  * source and destination cgroups.
221  */
222 void cgroup_freezer_migrate_task(struct task_struct *task,
223 				 struct cgroup *src, struct cgroup *dst)
224 {
225 	lockdep_assert_held(&css_set_lock);
226 
227 	/*
228 	 * Kernel threads are not supposed to be frozen at all.
229 	 */
230 	if (task->flags & PF_KTHREAD)
231 		return;
232 
233 	/*
234 	 * It's not necessary to do changes if both of the src and dst cgroups
235 	 * are not freezing and task is not frozen.
236 	 */
237 	if (!test_bit(CGRP_FREEZE, &src->flags) &&
238 	    !test_bit(CGRP_FREEZE, &dst->flags) &&
239 	    !task->frozen)
240 		return;
241 
242 	/*
243 	 * Adjust counters of freezing and frozen tasks.
244 	 * Note, that if the task is frozen, but the destination cgroup is not
245 	 * frozen, we bump both counters to keep them balanced.
246 	 */
247 	if (task->frozen) {
248 		cgroup_inc_frozen_cnt(dst);
249 		cgroup_dec_frozen_cnt(src);
250 	}
251 	cgroup_update_frozen(dst);
252 	cgroup_update_frozen(src);
253 
254 	/*
255 	 * Force the task to the desired state.
256 	 */
257 	cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
258 }
259 
260 void cgroup_freeze(struct cgroup *cgrp, bool freeze)
261 {
262 	struct cgroup_subsys_state *css;
263 	struct cgroup *dsct;
264 	bool applied = false;
265 
266 	lockdep_assert_held(&cgroup_mutex);
267 
268 	/*
269 	 * Nothing changed? Just exit.
270 	 */
271 	if (cgrp->freezer.freeze == freeze)
272 		return;
273 
274 	cgrp->freezer.freeze = freeze;
275 
276 	/*
277 	 * Propagate changes downwards the cgroup tree.
278 	 */
279 	css_for_each_descendant_pre(css, &cgrp->self) {
280 		dsct = css->cgroup;
281 
282 		if (cgroup_is_dead(dsct))
283 			continue;
284 
285 		if (freeze) {
286 			dsct->freezer.e_freeze++;
287 			/*
288 			 * Already frozen because of ancestor's settings?
289 			 */
290 			if (dsct->freezer.e_freeze > 1)
291 				continue;
292 		} else {
293 			dsct->freezer.e_freeze--;
294 			/*
295 			 * Still frozen because of ancestor's settings?
296 			 */
297 			if (dsct->freezer.e_freeze > 0)
298 				continue;
299 
300 			WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
301 		}
302 
303 		/*
304 		 * Do change actual state: freeze or unfreeze.
305 		 */
306 		cgroup_do_freeze(dsct, freeze);
307 		applied = true;
308 	}
309 
310 	/*
311 	 * Even if the actual state hasn't changed, let's notify a user.
312 	 * The state can be enforced by an ancestor cgroup: the cgroup
313 	 * can already be in the desired state or it can be locked in the
314 	 * opposite state, so that the transition will never happen.
315 	 * In both cases it's better to notify a user, that there is
316 	 * nothing to wait for.
317 	 */
318 	if (!applied) {
319 		TRACE_CGROUP_PATH(notify_frozen, cgrp,
320 				  test_bit(CGRP_FROZEN, &cgrp->flags));
321 		cgroup_file_notify(&cgrp->events_file);
322 	}
323 }
324