// SPDX-License-Identifier: GPL-2.0-only #include <linux/prctl.h> #include "sched.h" /* * A simple wrapper around refcount. An allocated sched_core_cookie's * address is used to compute the cookie of the task. */ struct sched_core_cookie { refcount_t refcnt; }; static unsigned long sched_core_alloc_cookie(void) { struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL); if (!ck) return 0; refcount_set(&ck->refcnt, 1); sched_core_get(); return (unsigned long)ck; } static void sched_core_put_cookie(unsigned long cookie) { struct sched_core_cookie *ptr = (void *)cookie; if (ptr && refcount_dec_and_test(&ptr->refcnt)) { kfree(ptr); sched_core_put(); } } static unsigned long sched_core_get_cookie(unsigned long cookie) { struct sched_core_cookie *ptr = (void *)cookie; if (ptr) refcount_inc(&ptr->refcnt); return cookie; } /* * sched_core_update_cookie - replace the cookie on a task * @p: the task to update * @cookie: the new cookie * * Effectively exchange the task cookie; caller is responsible for lifetimes on * both ends. * * Returns: the old cookie */ static unsigned long sched_core_update_cookie(struct task_struct *p, unsigned long cookie) { unsigned long old_cookie; struct rq_flags rf; struct rq *rq; bool enqueued; rq = task_rq_lock(p, &rf); /* * Since creating a cookie implies sched_core_get(), and we cannot set * a cookie until after we've created it, similarly, we cannot destroy * a cookie until after we've removed it, we must have core scheduling * enabled here. */ SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq)); enqueued = sched_core_enqueued(p); if (enqueued) sched_core_dequeue(rq, p, DEQUEUE_SAVE); old_cookie = p->core_cookie; p->core_cookie = cookie; if (enqueued) sched_core_enqueue(rq, p); /* * If task is currently running, it may not be compatible anymore after * the cookie change, so enter the scheduler on its CPU to schedule it * away. * * Note that it is possible that as a result of this cookie change, the * core has now entered/left forced idle state. Defer accounting to the * next scheduling edge, rather than always forcing a reschedule here. */ if (task_running(rq, p)) resched_curr(rq); task_rq_unlock(rq, p, &rf); return old_cookie; } static unsigned long sched_core_clone_cookie(struct task_struct *p) { unsigned long cookie, flags; raw_spin_lock_irqsave(&p->pi_lock, flags); cookie = sched_core_get_cookie(p->core_cookie); raw_spin_unlock_irqrestore(&p->pi_lock, flags); return cookie; } void sched_core_fork(struct task_struct *p) { RB_CLEAR_NODE(&p->core_node); p->core_cookie = sched_core_clone_cookie(current); } void sched_core_free(struct task_struct *p) { sched_core_put_cookie(p->core_cookie); } static void __sched_core_set(struct task_struct *p, unsigned long cookie) { cookie = sched_core_get_cookie(cookie); cookie = sched_core_update_cookie(p, cookie); sched_core_put_cookie(cookie); } /* Called from prctl interface: PR_SCHED_CORE */ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, unsigned long uaddr) { unsigned long cookie = 0, id = 0; struct task_struct *task, *p; struct pid *grp; int err = 0; if (!static_branch_likely(&sched_smt_present)) return -ENODEV; BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID); BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID); BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID); if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || (cmd != PR_SCHED_CORE_GET && uaddr)) return -EINVAL; rcu_read_lock(); if (pid == 0) { task = current; } else { task = find_task_by_vpid(pid); if (!task) { rcu_read_unlock(); return -ESRCH; } } get_task_struct(task); rcu_read_unlock(); /* * Check if this process has the right to modify the specified * process. Use the regular "ptrace_may_access()" checks. */ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { err = -EPERM; goto out; } switch (cmd) { case PR_SCHED_CORE_GET: if (type != PIDTYPE_PID || uaddr & 7) { err = -EINVAL; goto out; } cookie = sched_core_clone_cookie(task); if (cookie) { /* XXX improve ? */ ptr_to_hashval((void *)cookie, &id); } err = put_user(id, (u64 __user *)uaddr); goto out; case PR_SCHED_CORE_CREATE: cookie = sched_core_alloc_cookie(); if (!cookie) { err = -ENOMEM; goto out; } break; case PR_SCHED_CORE_SHARE_TO: cookie = sched_core_clone_cookie(current); break; case PR_SCHED_CORE_SHARE_FROM: if (type != PIDTYPE_PID) { err = -EINVAL; goto out; } cookie = sched_core_clone_cookie(task); __sched_core_set(current, cookie); goto out; default: err = -EINVAL; goto out; }; if (type == PIDTYPE_PID) { __sched_core_set(task, cookie); goto out; } read_lock(&tasklist_lock); grp = task_pid_type(task, type); do_each_pid_thread(grp, type, p) { if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) { err = -EPERM; goto out_tasklist; } } while_each_pid_thread(grp, type, p); do_each_pid_thread(grp, type, p) { __sched_core_set(p, cookie); } while_each_pid_thread(grp, type, p); out_tasklist: read_unlock(&tasklist_lock); out: sched_core_put_cookie(cookie); put_task_struct(task); return err; } #ifdef CONFIG_SCHEDSTATS /* REQUIRES: rq->core's clock recently updated. */ void __sched_core_account_forceidle(struct rq *rq) { const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq)); u64 delta, now = rq_clock(rq->core); struct rq *rq_i; struct task_struct *p; int i; lockdep_assert_rq_held(rq); WARN_ON_ONCE(!rq->core->core_forceidle_count); if (rq->core->core_forceidle_start == 0) return; delta = now - rq->core->core_forceidle_start; if (unlikely((s64)delta <= 0)) return; rq->core->core_forceidle_start = now; if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) { /* can't be forced idle without a running task */ } else if (rq->core->core_forceidle_count > 1 || rq->core->core_forceidle_occupation > 1) { /* * For larger SMT configurations, we need to scale the charged * forced idle amount since there can be more than one forced * idle sibling and more than one running cookied task. */ delta *= rq->core->core_forceidle_count; delta = div_u64(delta, rq->core->core_forceidle_occupation); } for_each_cpu(i, smt_mask) { rq_i = cpu_rq(i); p = rq_i->core_pick ?: rq_i->curr; if (p == rq_i->idle) continue; __schedstat_add(p->stats.core_forceidle_sum, delta); } } void __sched_core_tick(struct rq *rq) { if (!rq->core->core_forceidle_count) return; if (rq != rq->core) update_rq_clock(rq->core); __sched_core_account_forceidle(rq); } #endif /* CONFIG_SCHEDSTATS */