core.c (db6da59cf27b5661ced03754ae0550f8914eda9e) core.c (cab3ecaed5cdcc9c36a96874b4c45056a46ece45)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * kernel/sched/core.c
4 *
5 * Core kernel scheduler code and related syscalls
6 *
7 * Copyright (C) 1991-2002 Linus Torvalds
8 */

--- 2199 unchanged lines hidden (view full) ---

2208 /*
2209 * A queue event has occurred, and we're going to schedule. In
2210 * this case, we can save a useless back to back clock update.
2211 */
2212 if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
2213 rq_clock_skip_update(rq);
2214}
2215
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * kernel/sched/core.c
4 *
5 * Core kernel scheduler code and related syscalls
6 *
7 * Copyright (C) 1991-2002 Linus Torvalds
8 */

--- 2199 unchanged lines hidden (view full) ---

2208 /*
2209 * A queue event has occurred, and we're going to schedule. In
2210 * this case, we can save a useless back to back clock update.
2211 */
2212 if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
2213 rq_clock_skip_update(rq);
2214}
2215
2216static __always_inline
2217int __task_state_match(struct task_struct *p, unsigned int state)
2218{
2219 if (READ_ONCE(p->__state) & state)
2220 return 1;
2221
2222#ifdef CONFIG_PREEMPT_RT
2223 if (READ_ONCE(p->saved_state) & state)
2224 return -1;
2225#endif
2226 return 0;
2227}
2228
2229static __always_inline
2230int task_state_match(struct task_struct *p, unsigned int state)
2231{
2232#ifdef CONFIG_PREEMPT_RT
2233 int match;
2234
2235 /*
2236 * Serialize against current_save_and_set_rtlock_wait_state() and
2237 * current_restore_rtlock_saved_state().
2238 */
2239 raw_spin_lock_irq(&p->pi_lock);
2240 match = __task_state_match(p, state);
2241 raw_spin_unlock_irq(&p->pi_lock);
2242
2243 return match;
2244#else
2245 return __task_state_match(p, state);
2246#endif
2247}
2248
2249/*
2250 * wait_task_inactive - wait for a thread to unschedule.
2251 *
2252 * Wait for the thread to block in any of the states set in @match_state.
2253 * If it changes, i.e. @p might have woken up, then return zero. When we
2254 * succeed in waiting for @p to be off its CPU, we return a positive number
2255 * (its total switch count). If a second call a short while later returns the
2256 * same number, the caller can be sure that @p has remained unscheduled the
2257 * whole time.
2258 *
2259 * The caller must ensure that the task *will* unschedule sometime soon,
2260 * else this function might spin for a *long* time. This function can't
2261 * be called with interrupts off, or it may introduce deadlock with
2262 * smp_call_function() if an IPI is sent by the same process we are
2263 * waiting to become inactive.
2264 */
2265unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
2266{
2267 int running, queued, match;
2268 struct rq_flags rf;
2269 unsigned long ncsw;
2270 struct rq *rq;
2271
2272 for (;;) {
2273 /*
2274 * We do the initial early heuristics without holding
2275 * any task-queue locks at all. We'll only try to get
2276 * the runqueue lock when things look like they will
2277 * work out!
2278 */
2279 rq = task_rq(p);
2280
2281 /*
2282 * If the task is actively running on another CPU
2283 * still, just relax and busy-wait without holding
2284 * any locks.
2285 *
2286 * NOTE! Since we don't hold any locks, it's not
2287 * even sure that "rq" stays as the right runqueue!
2288 * But we don't care, since "task_on_cpu()" will
2289 * return false if the runqueue has changed and p
2290 * is actually now running somewhere else!
2291 */
2292 while (task_on_cpu(rq, p)) {
2293 if (!task_state_match(p, match_state))
2294 return 0;
2295 cpu_relax();
2296 }
2297
2298 /*
2299 * Ok, time to look more closely! We need the rq
2300 * lock now, to be *sure*. If we're wrong, we'll
2301 * just go back and repeat.
2302 */
2303 rq = task_rq_lock(p, &rf);
2304 trace_sched_wait_task(p);
2305 running = task_on_cpu(rq, p);
2306 queued = task_on_rq_queued(p);
2307 ncsw = 0;
2308 if ((match = __task_state_match(p, match_state))) {
2309 /*
2310 * When matching on p->saved_state, consider this task
2311 * still queued so it will wait.
2312 */
2313 if (match < 0)
2314 queued = 1;
2315 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
2316 }
2317 task_rq_unlock(rq, p, &rf);
2318
2319 /*
2320 * If it changed from the expected state, bail out now.
2321 */
2322 if (unlikely(!ncsw))
2323 break;
2324
2325 /*
2326 * Was it really running after all now that we
2327 * checked with the proper locks actually held?
2328 *
2329 * Oops. Go back and try again..
2330 */
2331 if (unlikely(running)) {
2332 cpu_relax();
2333 continue;
2334 }
2335
2336 /*
2337 * It's not enough that it's not actively running,
2338 * it must be off the runqueue _entirely_, and not
2339 * preempted!
2340 *
2341 * So if it was still runnable (but just not actively
2342 * running right now), it's preempted, and we should
2343 * yield - it could be a while.
2344 */
2345 if (unlikely(queued)) {
2346 ktime_t to = NSEC_PER_SEC / HZ;
2347
2348 set_current_state(TASK_UNINTERRUPTIBLE);
2349 schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
2350 continue;
2351 }
2352
2353 /*
2354 * Ahh, all good. It wasn't running, and it wasn't
2355 * runnable, which means that it will never become
2356 * running in the future either. We're all done!
2357 */
2358 break;
2359 }
2360
2361 return ncsw;
2362}
2363
2216#ifdef CONFIG_SMP
2217
2218static void
2219__do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx);
2220
2221static int __set_cpus_allowed_ptr(struct task_struct *p,
2222 struct affinity_context *ctx);
2223

--- 1112 unchanged lines hidden (view full) ---

3336 trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
3337 ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
3338
3339out:
3340 return ret;
3341}
3342#endif /* CONFIG_NUMA_BALANCING */
3343
2364#ifdef CONFIG_SMP
2365
2366static void
2367__do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx);
2368
2369static int __set_cpus_allowed_ptr(struct task_struct *p,
2370 struct affinity_context *ctx);
2371

--- 1112 unchanged lines hidden (view full) ---

3484 trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
3485 ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
3486
3487out:
3488 return ret;
3489}
3490#endif /* CONFIG_NUMA_BALANCING */
3491
3344/*
3345 * wait_task_inactive - wait for a thread to unschedule.
3346 *
3347 * Wait for the thread to block in any of the states set in @match_state.
3348 * If it changes, i.e. @p might have woken up, then return zero. When we
3349 * succeed in waiting for @p to be off its CPU, we return a positive number
3350 * (its total switch count). If a second call a short while later returns the
3351 * same number, the caller can be sure that @p has remained unscheduled the
3352 * whole time.
3353 *
3354 * The caller must ensure that the task *will* unschedule sometime soon,
3355 * else this function might spin for a *long* time. This function can't
3356 * be called with interrupts off, or it may introduce deadlock with
3357 * smp_call_function() if an IPI is sent by the same process we are
3358 * waiting to become inactive.
3359 */
3360unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
3361{
3362 int running, queued;
3363 struct rq_flags rf;
3364 unsigned long ncsw;
3365 struct rq *rq;
3366
3367 for (;;) {
3368 /*
3369 * We do the initial early heuristics without holding
3370 * any task-queue locks at all. We'll only try to get
3371 * the runqueue lock when things look like they will
3372 * work out!
3373 */
3374 rq = task_rq(p);
3375
3376 /*
3377 * If the task is actively running on another CPU
3378 * still, just relax and busy-wait without holding
3379 * any locks.
3380 *
3381 * NOTE! Since we don't hold any locks, it's not
3382 * even sure that "rq" stays as the right runqueue!
3383 * But we don't care, since "task_on_cpu()" will
3384 * return false if the runqueue has changed and p
3385 * is actually now running somewhere else!
3386 */
3387 while (task_on_cpu(rq, p)) {
3388 if (!(READ_ONCE(p->__state) & match_state))
3389 return 0;
3390 cpu_relax();
3391 }
3392
3393 /*
3394 * Ok, time to look more closely! We need the rq
3395 * lock now, to be *sure*. If we're wrong, we'll
3396 * just go back and repeat.
3397 */
3398 rq = task_rq_lock(p, &rf);
3399 trace_sched_wait_task(p);
3400 running = task_on_cpu(rq, p);
3401 queued = task_on_rq_queued(p);
3402 ncsw = 0;
3403 if (READ_ONCE(p->__state) & match_state)
3404 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
3405 task_rq_unlock(rq, p, &rf);
3406
3407 /*
3408 * If it changed from the expected state, bail out now.
3409 */
3410 if (unlikely(!ncsw))
3411 break;
3412
3413 /*
3414 * Was it really running after all now that we
3415 * checked with the proper locks actually held?
3416 *
3417 * Oops. Go back and try again..
3418 */
3419 if (unlikely(running)) {
3420 cpu_relax();
3421 continue;
3422 }
3423
3424 /*
3425 * It's not enough that it's not actively running,
3426 * it must be off the runqueue _entirely_, and not
3427 * preempted!
3428 *
3429 * So if it was still runnable (but just not actively
3430 * running right now), it's preempted, and we should
3431 * yield - it could be a while.
3432 */
3433 if (unlikely(queued)) {
3434 ktime_t to = NSEC_PER_SEC / HZ;
3435
3436 set_current_state(TASK_UNINTERRUPTIBLE);
3437 schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
3438 continue;
3439 }
3440
3441 /*
3442 * Ahh, all good. It wasn't running, and it wasn't
3443 * runnable, which means that it will never become
3444 * running in the future either. We're all done!
3445 */
3446 break;
3447 }
3448
3449 return ncsw;
3450}
3451
3452/***
3453 * kick_process - kick a running thread to enter/exit the kernel
3454 * @p: the to-be-kicked thread
3455 *
3456 * Cause a process which is running on another CPU to enter
3457 * kernel-mode, without any delay. (to get signals handled.)
3458 *
3459 * NOTE: this function doesn't have to take the runqueue lock,

--- 538 unchanged lines hidden (view full) ---

3998 * p::saved_state, which means the code is fully serialized in both cases.
3999 *
4000 * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other
4001 * bits set. This allows to distinguish all wakeup scenarios.
4002 */
4003static __always_inline
4004bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
4005{
3492/***
3493 * kick_process - kick a running thread to enter/exit the kernel
3494 * @p: the to-be-kicked thread
3495 *
3496 * Cause a process which is running on another CPU to enter
3497 * kernel-mode, without any delay. (to get signals handled.)
3498 *
3499 * NOTE: this function doesn't have to take the runqueue lock,

--- 538 unchanged lines hidden (view full) ---

4038 * p::saved_state, which means the code is fully serialized in both cases.
4039 *
4040 * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other
4041 * bits set. This allows to distinguish all wakeup scenarios.
4042 */
4043static __always_inline
4044bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
4045{
4046 int match;
4047
4006 if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
4007 WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
4008 state != TASK_RTLOCK_WAIT);
4009 }
4010
4048 if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
4049 WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
4050 state != TASK_RTLOCK_WAIT);
4051 }
4052
4011 if (READ_ONCE(p->__state) & state) {
4012 *success = 1;
4013 return true;
4014 }
4053 *success = !!(match = __task_state_match(p, state));
4015
4016#ifdef CONFIG_PREEMPT_RT
4017 /*
4018 * Saved state preserves the task state across blocking on
4019 * an RT lock. If the state matches, set p::saved_state to
4020 * TASK_RUNNING, but do not wake the task because it waits
4021 * for a lock wakeup. Also indicate success because from
4022 * the regular waker's point of view this has succeeded.
4023 *
4024 * After acquiring the lock the task will restore p::__state
4025 * from p::saved_state which ensures that the regular
4026 * wakeup is not lost. The restore will also set
4027 * p::saved_state to TASK_RUNNING so any further tests will
4028 * not result in false positives vs. @success
4029 */
4054
4055#ifdef CONFIG_PREEMPT_RT
4056 /*
4057 * Saved state preserves the task state across blocking on
4058 * an RT lock. If the state matches, set p::saved_state to
4059 * TASK_RUNNING, but do not wake the task because it waits
4060 * for a lock wakeup. Also indicate success because from
4061 * the regular waker's point of view this has succeeded.
4062 *
4063 * After acquiring the lock the task will restore p::__state
4064 * from p::saved_state which ensures that the regular
4065 * wakeup is not lost. The restore will also set
4066 * p::saved_state to TASK_RUNNING so any further tests will
4067 * not result in false positives vs. @success
4068 */
4030 if (p->saved_state & state) {
4069 if (match < 0)
4031 p->saved_state = TASK_RUNNING;
4070 p->saved_state = TASK_RUNNING;
4032 *success = 1;
4033 }
4034#endif
4071#endif
4035 return false;
4072 return match > 0;
4036}
4037
4038/*
4039 * Notes on Program-Order guarantees on SMP systems.
4040 *
4041 * MIGRATION
4042 *
4043 * The basic program-order guarantee on SMP systems is that when a task [t]

--- 5499 unchanged lines hidden (view full) ---

9543 }
9544}
9545
9546void set_rq_offline(struct rq *rq)
9547{
9548 if (rq->online) {
9549 const struct sched_class *class;
9550
4073}
4074
4075/*
4076 * Notes on Program-Order guarantees on SMP systems.
4077 *
4078 * MIGRATION
4079 *
4080 * The basic program-order guarantee on SMP systems is that when a task [t]

--- 5499 unchanged lines hidden (view full) ---

9580 }
9581}
9582
9583void set_rq_offline(struct rq *rq)
9584{
9585 if (rq->online) {
9586 const struct sched_class *class;
9587
9588 update_rq_clock(rq);
9551 for_each_class(class) {
9552 if (class->rq_offline)
9553 class->rq_offline(rq);
9554 }
9555
9556 cpumask_clear_cpu(rq->cpu, rq->rd->online);
9557 rq->online = 0;
9558 }

--- 125 unchanged lines hidden (view full) ---

9684 * ttwu_queue_cond() and is_cpu_allowed().
9685 *
9686 * Do sync before park smpboot threads to take care the rcu boost case.
9687 */
9688 synchronize_rcu();
9689
9690 rq_lock_irqsave(rq, &rf);
9691 if (rq->rd) {
9589 for_each_class(class) {
9590 if (class->rq_offline)
9591 class->rq_offline(rq);
9592 }
9593
9594 cpumask_clear_cpu(rq->cpu, rq->rd->online);
9595 rq->online = 0;
9596 }

--- 125 unchanged lines hidden (view full) ---

9722 * ttwu_queue_cond() and is_cpu_allowed().
9723 *
9724 * Do sync before park smpboot threads to take care the rcu boost case.
9725 */
9726 synchronize_rcu();
9727
9728 rq_lock_irqsave(rq, &rf);
9729 if (rq->rd) {
9692 update_rq_clock(rq);
9693 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
9694 set_rq_offline(rq);
9695 }
9696 rq_unlock_irqrestore(rq, &rf);
9697
9698#ifdef CONFIG_SCHED_SMT
9699 /*
9700 * When going down, decrement the number of cores with SMT present.

--- 1786 unchanged lines hidden (view full) ---

11487
11488void call_trace_sched_update_nr_running(struct rq *rq, int count)
11489{
11490 trace_sched_update_nr_running_tp(rq, count);
11491}
11492
11493#ifdef CONFIG_SCHED_MM_CID
11494
9730 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
9731 set_rq_offline(rq);
9732 }
9733 rq_unlock_irqrestore(rq, &rf);
9734
9735#ifdef CONFIG_SCHED_SMT
9736 /*
9737 * When going down, decrement the number of cores with SMT present.

--- 1786 unchanged lines hidden (view full) ---

11524
11525void call_trace_sched_update_nr_running(struct rq *rq, int count)
11526{
11527 trace_sched_update_nr_running_tp(rq, count);
11528}
11529
11530#ifdef CONFIG_SCHED_MM_CID
11531
11495/*
11532/**
11496 * @cid_lock: Guarantee forward-progress of cid allocation.
11497 *
11498 * Concurrency ID allocation within a bitmap is mostly lock-free. The cid_lock
11499 * is only used when contention is detected by the lock-free allocation so
11500 * forward progress can be guaranteed.
11501 */
11502DEFINE_RAW_SPINLOCK(cid_lock);
11503
11533 * @cid_lock: Guarantee forward-progress of cid allocation.
11534 *
11535 * Concurrency ID allocation within a bitmap is mostly lock-free. The cid_lock
11536 * is only used when contention is detected by the lock-free allocation so
11537 * forward progress can be guaranteed.
11538 */
11539DEFINE_RAW_SPINLOCK(cid_lock);
11540
11504/*
11541/**
11505 * @use_cid_lock: Select cid allocation behavior: lock-free vs spinlock.
11506 *
11507 * When @use_cid_lock is 0, the cid allocation is lock-free. When contention is
11508 * detected, it is set to 1 to ensure that all newly coming allocations are
11509 * serialized by @cid_lock until the allocation which detected contention
11510 * completes and sets @use_cid_lock back to 0. This guarantees forward progress
11511 * of a cid allocation.
11512 */

--- 508 unchanged lines hidden ---
11542 * @use_cid_lock: Select cid allocation behavior: lock-free vs spinlock.
11543 *
11544 * When @use_cid_lock is 0, the cid allocation is lock-free. When contention is
11545 * detected, it is set to 1 to ensure that all newly coming allocations are
11546 * serialized by @cid_lock until the allocation which detected contention
11547 * completes and sets @use_cid_lock back to 0. This guarantees forward progress
11548 * of a cid allocation.
11549 */

--- 508 unchanged lines hidden ---