xref: /openbmc/linux/kernel/sched/core_sched.c (revision 0b9d46fc)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 /*
4  * A simple wrapper around refcount. An allocated sched_core_cookie's
5  * address is used to compute the cookie of the task.
6  */
7 struct sched_core_cookie {
8 	refcount_t refcnt;
9 };
10 
sched_core_alloc_cookie(void)11 static unsigned long sched_core_alloc_cookie(void)
12 {
13 	struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL);
14 	if (!ck)
15 		return 0;
16 
17 	refcount_set(&ck->refcnt, 1);
18 	sched_core_get();
19 
20 	return (unsigned long)ck;
21 }
22 
sched_core_put_cookie(unsigned long cookie)23 static void sched_core_put_cookie(unsigned long cookie)
24 {
25 	struct sched_core_cookie *ptr = (void *)cookie;
26 
27 	if (ptr && refcount_dec_and_test(&ptr->refcnt)) {
28 		kfree(ptr);
29 		sched_core_put();
30 	}
31 }
32 
sched_core_get_cookie(unsigned long cookie)33 static unsigned long sched_core_get_cookie(unsigned long cookie)
34 {
35 	struct sched_core_cookie *ptr = (void *)cookie;
36 
37 	if (ptr)
38 		refcount_inc(&ptr->refcnt);
39 
40 	return cookie;
41 }
42 
43 /*
44  * sched_core_update_cookie - replace the cookie on a task
45  * @p: the task to update
46  * @cookie: the new cookie
47  *
48  * Effectively exchange the task cookie; caller is responsible for lifetimes on
49  * both ends.
50  *
51  * Returns: the old cookie
52  */
sched_core_update_cookie(struct task_struct * p,unsigned long cookie)53 static unsigned long sched_core_update_cookie(struct task_struct *p,
54 					      unsigned long cookie)
55 {
56 	unsigned long old_cookie;
57 	struct rq_flags rf;
58 	struct rq *rq;
59 
60 	rq = task_rq_lock(p, &rf);
61 
62 	/*
63 	 * Since creating a cookie implies sched_core_get(), and we cannot set
64 	 * a cookie until after we've created it, similarly, we cannot destroy
65 	 * a cookie until after we've removed it, we must have core scheduling
66 	 * enabled here.
67 	 */
68 	SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq));
69 
70 	if (sched_core_enqueued(p))
71 		sched_core_dequeue(rq, p, DEQUEUE_SAVE);
72 
73 	old_cookie = p->core_cookie;
74 	p->core_cookie = cookie;
75 
76 	/*
77 	 * Consider the cases: !prev_cookie and !cookie.
78 	 */
79 	if (cookie && task_on_rq_queued(p))
80 		sched_core_enqueue(rq, p);
81 
82 	/*
83 	 * If task is currently running, it may not be compatible anymore after
84 	 * the cookie change, so enter the scheduler on its CPU to schedule it
85 	 * away.
86 	 *
87 	 * Note that it is possible that as a result of this cookie change, the
88 	 * core has now entered/left forced idle state. Defer accounting to the
89 	 * next scheduling edge, rather than always forcing a reschedule here.
90 	 */
91 	if (task_on_cpu(rq, p))
92 		resched_curr(rq);
93 
94 	task_rq_unlock(rq, p, &rf);
95 
96 	return old_cookie;
97 }
98 
sched_core_clone_cookie(struct task_struct * p)99 static unsigned long sched_core_clone_cookie(struct task_struct *p)
100 {
101 	unsigned long cookie, flags;
102 
103 	raw_spin_lock_irqsave(&p->pi_lock, flags);
104 	cookie = sched_core_get_cookie(p->core_cookie);
105 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
106 
107 	return cookie;
108 }
109 
sched_core_fork(struct task_struct * p)110 void sched_core_fork(struct task_struct *p)
111 {
112 	RB_CLEAR_NODE(&p->core_node);
113 	p->core_cookie = sched_core_clone_cookie(current);
114 }
115 
sched_core_free(struct task_struct * p)116 void sched_core_free(struct task_struct *p)
117 {
118 	sched_core_put_cookie(p->core_cookie);
119 }
120 
__sched_core_set(struct task_struct * p,unsigned long cookie)121 static void __sched_core_set(struct task_struct *p, unsigned long cookie)
122 {
123 	cookie = sched_core_get_cookie(cookie);
124 	cookie = sched_core_update_cookie(p, cookie);
125 	sched_core_put_cookie(cookie);
126 }
127 
128 /* Called from prctl interface: PR_SCHED_CORE */
sched_core_share_pid(unsigned int cmd,pid_t pid,enum pid_type type,unsigned long uaddr)129 int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
130 			 unsigned long uaddr)
131 {
132 	unsigned long cookie = 0, id = 0;
133 	struct task_struct *task, *p;
134 	struct pid *grp;
135 	int err = 0;
136 
137 	if (!static_branch_likely(&sched_smt_present))
138 		return -ENODEV;
139 
140 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID);
141 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID);
142 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID);
143 
144 	if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 ||
145 	    (cmd != PR_SCHED_CORE_GET && uaddr))
146 		return -EINVAL;
147 
148 	rcu_read_lock();
149 	if (pid == 0) {
150 		task = current;
151 	} else {
152 		task = find_task_by_vpid(pid);
153 		if (!task) {
154 			rcu_read_unlock();
155 			return -ESRCH;
156 		}
157 	}
158 	get_task_struct(task);
159 	rcu_read_unlock();
160 
161 	/*
162 	 * Check if this process has the right to modify the specified
163 	 * process. Use the regular "ptrace_may_access()" checks.
164 	 */
165 	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
166 		err = -EPERM;
167 		goto out;
168 	}
169 
170 	switch (cmd) {
171 	case PR_SCHED_CORE_GET:
172 		if (type != PIDTYPE_PID || uaddr & 7) {
173 			err = -EINVAL;
174 			goto out;
175 		}
176 		cookie = sched_core_clone_cookie(task);
177 		if (cookie) {
178 			/* XXX improve ? */
179 			ptr_to_hashval((void *)cookie, &id);
180 		}
181 		err = put_user(id, (u64 __user *)uaddr);
182 		goto out;
183 
184 	case PR_SCHED_CORE_CREATE:
185 		cookie = sched_core_alloc_cookie();
186 		if (!cookie) {
187 			err = -ENOMEM;
188 			goto out;
189 		}
190 		break;
191 
192 	case PR_SCHED_CORE_SHARE_TO:
193 		cookie = sched_core_clone_cookie(current);
194 		break;
195 
196 	case PR_SCHED_CORE_SHARE_FROM:
197 		if (type != PIDTYPE_PID) {
198 			err = -EINVAL;
199 			goto out;
200 		}
201 		cookie = sched_core_clone_cookie(task);
202 		__sched_core_set(current, cookie);
203 		goto out;
204 
205 	default:
206 		err = -EINVAL;
207 		goto out;
208 	}
209 
210 	if (type == PIDTYPE_PID) {
211 		__sched_core_set(task, cookie);
212 		goto out;
213 	}
214 
215 	read_lock(&tasklist_lock);
216 	grp = task_pid_type(task, type);
217 
218 	do_each_pid_thread(grp, type, p) {
219 		if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) {
220 			err = -EPERM;
221 			goto out_tasklist;
222 		}
223 	} while_each_pid_thread(grp, type, p);
224 
225 	do_each_pid_thread(grp, type, p) {
226 		__sched_core_set(p, cookie);
227 	} while_each_pid_thread(grp, type, p);
228 out_tasklist:
229 	read_unlock(&tasklist_lock);
230 
231 out:
232 	sched_core_put_cookie(cookie);
233 	put_task_struct(task);
234 	return err;
235 }
236 
237 #ifdef CONFIG_SCHEDSTATS
238 
239 /* REQUIRES: rq->core's clock recently updated. */
__sched_core_account_forceidle(struct rq * rq)240 void __sched_core_account_forceidle(struct rq *rq)
241 {
242 	const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
243 	u64 delta, now = rq_clock(rq->core);
244 	struct rq *rq_i;
245 	struct task_struct *p;
246 	int i;
247 
248 	lockdep_assert_rq_held(rq);
249 
250 	WARN_ON_ONCE(!rq->core->core_forceidle_count);
251 
252 	if (rq->core->core_forceidle_start == 0)
253 		return;
254 
255 	delta = now - rq->core->core_forceidle_start;
256 	if (unlikely((s64)delta <= 0))
257 		return;
258 
259 	rq->core->core_forceidle_start = now;
260 
261 	if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) {
262 		/* can't be forced idle without a running task */
263 	} else if (rq->core->core_forceidle_count > 1 ||
264 		   rq->core->core_forceidle_occupation > 1) {
265 		/*
266 		 * For larger SMT configurations, we need to scale the charged
267 		 * forced idle amount since there can be more than one forced
268 		 * idle sibling and more than one running cookied task.
269 		 */
270 		delta *= rq->core->core_forceidle_count;
271 		delta = div_u64(delta, rq->core->core_forceidle_occupation);
272 	}
273 
274 	for_each_cpu(i, smt_mask) {
275 		rq_i = cpu_rq(i);
276 		p = rq_i->core_pick ?: rq_i->curr;
277 
278 		if (p == rq_i->idle)
279 			continue;
280 
281 		/*
282 		 * Note: this will account forceidle to the current cpu, even
283 		 * if it comes from our SMT sibling.
284 		 */
285 		__account_forceidle_time(p, delta);
286 	}
287 }
288 
__sched_core_tick(struct rq * rq)289 void __sched_core_tick(struct rq *rq)
290 {
291 	if (!rq->core->core_forceidle_count)
292 		return;
293 
294 	if (rq != rq->core)
295 		update_rq_clock(rq->core);
296 
297 	__sched_core_account_forceidle(rq);
298 }
299 
300 #endif /* CONFIG_SCHEDSTATS */
301