xref: /openbmc/linux/kernel/sched/core_sched.c (revision aa74c44b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #include <linux/prctl.h>
4 #include "sched.h"
5 
6 /*
7  * A simple wrapper around refcount. An allocated sched_core_cookie's
8  * address is used to compute the cookie of the task.
9  */
10 struct sched_core_cookie {
11 	refcount_t refcnt;
12 };
13 
14 static unsigned long sched_core_alloc_cookie(void)
15 {
16 	struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL);
17 	if (!ck)
18 		return 0;
19 
20 	refcount_set(&ck->refcnt, 1);
21 	sched_core_get();
22 
23 	return (unsigned long)ck;
24 }
25 
26 static void sched_core_put_cookie(unsigned long cookie)
27 {
28 	struct sched_core_cookie *ptr = (void *)cookie;
29 
30 	if (ptr && refcount_dec_and_test(&ptr->refcnt)) {
31 		kfree(ptr);
32 		sched_core_put();
33 	}
34 }
35 
36 static unsigned long sched_core_get_cookie(unsigned long cookie)
37 {
38 	struct sched_core_cookie *ptr = (void *)cookie;
39 
40 	if (ptr)
41 		refcount_inc(&ptr->refcnt);
42 
43 	return cookie;
44 }
45 
46 /*
47  * sched_core_update_cookie - replace the cookie on a task
48  * @p: the task to update
49  * @cookie: the new cookie
50  *
51  * Effectively exchange the task cookie; caller is responsible for lifetimes on
52  * both ends.
53  *
54  * Returns: the old cookie
55  */
56 static unsigned long sched_core_update_cookie(struct task_struct *p,
57 					      unsigned long cookie)
58 {
59 	unsigned long old_cookie;
60 	struct rq_flags rf;
61 	struct rq *rq;
62 	bool enqueued;
63 
64 	rq = task_rq_lock(p, &rf);
65 
66 	/*
67 	 * Since creating a cookie implies sched_core_get(), and we cannot set
68 	 * a cookie until after we've created it, similarly, we cannot destroy
69 	 * a cookie until after we've removed it, we must have core scheduling
70 	 * enabled here.
71 	 */
72 	SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq));
73 
74 	enqueued = sched_core_enqueued(p);
75 	if (enqueued)
76 		sched_core_dequeue(rq, p, DEQUEUE_SAVE);
77 
78 	old_cookie = p->core_cookie;
79 	p->core_cookie = cookie;
80 
81 	if (enqueued)
82 		sched_core_enqueue(rq, p);
83 
84 	/*
85 	 * If task is currently running, it may not be compatible anymore after
86 	 * the cookie change, so enter the scheduler on its CPU to schedule it
87 	 * away.
88 	 *
89 	 * Note that it is possible that as a result of this cookie change, the
90 	 * core has now entered/left forced idle state. Defer accounting to the
91 	 * next scheduling edge, rather than always forcing a reschedule here.
92 	 */
93 	if (task_running(rq, p))
94 		resched_curr(rq);
95 
96 	task_rq_unlock(rq, p, &rf);
97 
98 	return old_cookie;
99 }
100 
101 static unsigned long sched_core_clone_cookie(struct task_struct *p)
102 {
103 	unsigned long cookie, flags;
104 
105 	raw_spin_lock_irqsave(&p->pi_lock, flags);
106 	cookie = sched_core_get_cookie(p->core_cookie);
107 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
108 
109 	return cookie;
110 }
111 
112 void sched_core_fork(struct task_struct *p)
113 {
114 	RB_CLEAR_NODE(&p->core_node);
115 	p->core_cookie = sched_core_clone_cookie(current);
116 }
117 
118 void sched_core_free(struct task_struct *p)
119 {
120 	sched_core_put_cookie(p->core_cookie);
121 }
122 
123 static void __sched_core_set(struct task_struct *p, unsigned long cookie)
124 {
125 	cookie = sched_core_get_cookie(cookie);
126 	cookie = sched_core_update_cookie(p, cookie);
127 	sched_core_put_cookie(cookie);
128 }
129 
130 /* Called from prctl interface: PR_SCHED_CORE */
131 int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
132 			 unsigned long uaddr)
133 {
134 	unsigned long cookie = 0, id = 0;
135 	struct task_struct *task, *p;
136 	struct pid *grp;
137 	int err = 0;
138 
139 	if (!static_branch_likely(&sched_smt_present))
140 		return -ENODEV;
141 
142 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID);
143 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID);
144 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID);
145 
146 	if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 ||
147 	    (cmd != PR_SCHED_CORE_GET && uaddr))
148 		return -EINVAL;
149 
150 	rcu_read_lock();
151 	if (pid == 0) {
152 		task = current;
153 	} else {
154 		task = find_task_by_vpid(pid);
155 		if (!task) {
156 			rcu_read_unlock();
157 			return -ESRCH;
158 		}
159 	}
160 	get_task_struct(task);
161 	rcu_read_unlock();
162 
163 	/*
164 	 * Check if this process has the right to modify the specified
165 	 * process. Use the regular "ptrace_may_access()" checks.
166 	 */
167 	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
168 		err = -EPERM;
169 		goto out;
170 	}
171 
172 	switch (cmd) {
173 	case PR_SCHED_CORE_GET:
174 		if (type != PIDTYPE_PID || uaddr & 7) {
175 			err = -EINVAL;
176 			goto out;
177 		}
178 		cookie = sched_core_clone_cookie(task);
179 		if (cookie) {
180 			/* XXX improve ? */
181 			ptr_to_hashval((void *)cookie, &id);
182 		}
183 		err = put_user(id, (u64 __user *)uaddr);
184 		goto out;
185 
186 	case PR_SCHED_CORE_CREATE:
187 		cookie = sched_core_alloc_cookie();
188 		if (!cookie) {
189 			err = -ENOMEM;
190 			goto out;
191 		}
192 		break;
193 
194 	case PR_SCHED_CORE_SHARE_TO:
195 		cookie = sched_core_clone_cookie(current);
196 		break;
197 
198 	case PR_SCHED_CORE_SHARE_FROM:
199 		if (type != PIDTYPE_PID) {
200 			err = -EINVAL;
201 			goto out;
202 		}
203 		cookie = sched_core_clone_cookie(task);
204 		__sched_core_set(current, cookie);
205 		goto out;
206 
207 	default:
208 		err = -EINVAL;
209 		goto out;
210 	};
211 
212 	if (type == PIDTYPE_PID) {
213 		__sched_core_set(task, cookie);
214 		goto out;
215 	}
216 
217 	read_lock(&tasklist_lock);
218 	grp = task_pid_type(task, type);
219 
220 	do_each_pid_thread(grp, type, p) {
221 		if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) {
222 			err = -EPERM;
223 			goto out_tasklist;
224 		}
225 	} while_each_pid_thread(grp, type, p);
226 
227 	do_each_pid_thread(grp, type, p) {
228 		__sched_core_set(p, cookie);
229 	} while_each_pid_thread(grp, type, p);
230 out_tasklist:
231 	read_unlock(&tasklist_lock);
232 
233 out:
234 	sched_core_put_cookie(cookie);
235 	put_task_struct(task);
236 	return err;
237 }
238 
239 #ifdef CONFIG_SCHEDSTATS
240 
241 /* REQUIRES: rq->core's clock recently updated. */
242 void __sched_core_account_forceidle(struct rq *rq)
243 {
244 	const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
245 	u64 delta, now = rq_clock(rq->core);
246 	struct rq *rq_i;
247 	struct task_struct *p;
248 	int i;
249 
250 	lockdep_assert_rq_held(rq);
251 
252 	WARN_ON_ONCE(!rq->core->core_forceidle_count);
253 
254 	if (rq->core->core_forceidle_start == 0)
255 		return;
256 
257 	delta = now - rq->core->core_forceidle_start;
258 	if (unlikely((s64)delta <= 0))
259 		return;
260 
261 	rq->core->core_forceidle_start = now;
262 
263 	if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) {
264 		/* can't be forced idle without a running task */
265 	} else if (rq->core->core_forceidle_count > 1 ||
266 		   rq->core->core_forceidle_occupation > 1) {
267 		/*
268 		 * For larger SMT configurations, we need to scale the charged
269 		 * forced idle amount since there can be more than one forced
270 		 * idle sibling and more than one running cookied task.
271 		 */
272 		delta *= rq->core->core_forceidle_count;
273 		delta = div_u64(delta, rq->core->core_forceidle_occupation);
274 	}
275 
276 	for_each_cpu(i, smt_mask) {
277 		rq_i = cpu_rq(i);
278 		p = rq_i->core_pick ?: rq_i->curr;
279 
280 		if (!p->core_cookie)
281 			continue;
282 
283 		__schedstat_add(p->stats.core_forceidle_sum, delta);
284 	}
285 }
286 
287 void __sched_core_tick(struct rq *rq)
288 {
289 	if (!rq->core->core_forceidle_count)
290 		return;
291 
292 	if (rq != rq->core)
293 		update_rq_clock(rq->core);
294 
295 	__sched_core_account_forceidle(rq);
296 }
297 
298 #endif /* CONFIG_SCHEDSTATS */
299