core.c (db6da59cf27b5661ced03754ae0550f8914eda9e) | core.c (cab3ecaed5cdcc9c36a96874b4c45056a46ece45) |
---|---|
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * kernel/sched/core.c 4 * 5 * Core kernel scheduler code and related syscalls 6 * 7 * Copyright (C) 1991-2002 Linus Torvalds 8 */ --- 2199 unchanged lines hidden (view full) --- 2208 /* 2209 * A queue event has occurred, and we're going to schedule. In 2210 * this case, we can save a useless back to back clock update. 2211 */ 2212 if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) 2213 rq_clock_skip_update(rq); 2214} 2215 | 1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * kernel/sched/core.c 4 * 5 * Core kernel scheduler code and related syscalls 6 * 7 * Copyright (C) 1991-2002 Linus Torvalds 8 */ --- 2199 unchanged lines hidden (view full) --- 2208 /* 2209 * A queue event has occurred, and we're going to schedule. In 2210 * this case, we can save a useless back to back clock update. 2211 */ 2212 if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) 2213 rq_clock_skip_update(rq); 2214} 2215 |
2216static __always_inline 2217int __task_state_match(struct task_struct *p, unsigned int state) 2218{ 2219 if (READ_ONCE(p->__state) & state) 2220 return 1; 2221 2222#ifdef CONFIG_PREEMPT_RT 2223 if (READ_ONCE(p->saved_state) & state) 2224 return -1; 2225#endif 2226 return 0; 2227} 2228 2229static __always_inline 2230int task_state_match(struct task_struct *p, unsigned int state) 2231{ 2232#ifdef CONFIG_PREEMPT_RT 2233 int match; 2234 2235 /* 2236 * Serialize against current_save_and_set_rtlock_wait_state() and 2237 * current_restore_rtlock_saved_state(). 2238 */ 2239 raw_spin_lock_irq(&p->pi_lock); 2240 match = __task_state_match(p, state); 2241 raw_spin_unlock_irq(&p->pi_lock); 2242 2243 return match; 2244#else 2245 return __task_state_match(p, state); 2246#endif 2247} 2248 2249/* 2250 * wait_task_inactive - wait for a thread to unschedule. 2251 * 2252 * Wait for the thread to block in any of the states set in @match_state. 2253 * If it changes, i.e. @p might have woken up, then return zero. When we 2254 * succeed in waiting for @p to be off its CPU, we return a positive number 2255 * (its total switch count). If a second call a short while later returns the 2256 * same number, the caller can be sure that @p has remained unscheduled the 2257 * whole time. 2258 * 2259 * The caller must ensure that the task *will* unschedule sometime soon, 2260 * else this function might spin for a *long* time. This function can't 2261 * be called with interrupts off, or it may introduce deadlock with 2262 * smp_call_function() if an IPI is sent by the same process we are 2263 * waiting to become inactive. 2264 */ 2265unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) 2266{ 2267 int running, queued, match; 2268 struct rq_flags rf; 2269 unsigned long ncsw; 2270 struct rq *rq; 2271 2272 for (;;) { 2273 /* 2274 * We do the initial early heuristics without holding 2275 * any task-queue locks at all. We'll only try to get 2276 * the runqueue lock when things look like they will 2277 * work out! 2278 */ 2279 rq = task_rq(p); 2280 2281 /* 2282 * If the task is actively running on another CPU 2283 * still, just relax and busy-wait without holding 2284 * any locks. 2285 * 2286 * NOTE! Since we don't hold any locks, it's not 2287 * even sure that "rq" stays as the right runqueue! 2288 * But we don't care, since "task_on_cpu()" will 2289 * return false if the runqueue has changed and p 2290 * is actually now running somewhere else! 2291 */ 2292 while (task_on_cpu(rq, p)) { 2293 if (!task_state_match(p, match_state)) 2294 return 0; 2295 cpu_relax(); 2296 } 2297 2298 /* 2299 * Ok, time to look more closely! We need the rq 2300 * lock now, to be *sure*. If we're wrong, we'll 2301 * just go back and repeat. 2302 */ 2303 rq = task_rq_lock(p, &rf); 2304 trace_sched_wait_task(p); 2305 running = task_on_cpu(rq, p); 2306 queued = task_on_rq_queued(p); 2307 ncsw = 0; 2308 if ((match = __task_state_match(p, match_state))) { 2309 /* 2310 * When matching on p->saved_state, consider this task 2311 * still queued so it will wait. 2312 */ 2313 if (match < 0) 2314 queued = 1; 2315 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ 2316 } 2317 task_rq_unlock(rq, p, &rf); 2318 2319 /* 2320 * If it changed from the expected state, bail out now. 2321 */ 2322 if (unlikely(!ncsw)) 2323 break; 2324 2325 /* 2326 * Was it really running after all now that we 2327 * checked with the proper locks actually held? 2328 * 2329 * Oops. Go back and try again.. 2330 */ 2331 if (unlikely(running)) { 2332 cpu_relax(); 2333 continue; 2334 } 2335 2336 /* 2337 * It's not enough that it's not actively running, 2338 * it must be off the runqueue _entirely_, and not 2339 * preempted! 2340 * 2341 * So if it was still runnable (but just not actively 2342 * running right now), it's preempted, and we should 2343 * yield - it could be a while. 2344 */ 2345 if (unlikely(queued)) { 2346 ktime_t to = NSEC_PER_SEC / HZ; 2347 2348 set_current_state(TASK_UNINTERRUPTIBLE); 2349 schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); 2350 continue; 2351 } 2352 2353 /* 2354 * Ahh, all good. It wasn't running, and it wasn't 2355 * runnable, which means that it will never become 2356 * running in the future either. We're all done! 2357 */ 2358 break; 2359 } 2360 2361 return ncsw; 2362} 2363 |
|
2216#ifdef CONFIG_SMP 2217 2218static void 2219__do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx); 2220 2221static int __set_cpus_allowed_ptr(struct task_struct *p, 2222 struct affinity_context *ctx); 2223 --- 1112 unchanged lines hidden (view full) --- 3336 trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); 3337 ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); 3338 3339out: 3340 return ret; 3341} 3342#endif /* CONFIG_NUMA_BALANCING */ 3343 | 2364#ifdef CONFIG_SMP 2365 2366static void 2367__do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx); 2368 2369static int __set_cpus_allowed_ptr(struct task_struct *p, 2370 struct affinity_context *ctx); 2371 --- 1112 unchanged lines hidden (view full) --- 3484 trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); 3485 ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); 3486 3487out: 3488 return ret; 3489} 3490#endif /* CONFIG_NUMA_BALANCING */ 3491 |
3344/* 3345 * wait_task_inactive - wait for a thread to unschedule. 3346 * 3347 * Wait for the thread to block in any of the states set in @match_state. 3348 * If it changes, i.e. @p might have woken up, then return zero. When we 3349 * succeed in waiting for @p to be off its CPU, we return a positive number 3350 * (its total switch count). If a second call a short while later returns the 3351 * same number, the caller can be sure that @p has remained unscheduled the 3352 * whole time. 3353 * 3354 * The caller must ensure that the task *will* unschedule sometime soon, 3355 * else this function might spin for a *long* time. This function can't 3356 * be called with interrupts off, or it may introduce deadlock with 3357 * smp_call_function() if an IPI is sent by the same process we are 3358 * waiting to become inactive. 3359 */ 3360unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) 3361{ 3362 int running, queued; 3363 struct rq_flags rf; 3364 unsigned long ncsw; 3365 struct rq *rq; 3366 3367 for (;;) { 3368 /* 3369 * We do the initial early heuristics without holding 3370 * any task-queue locks at all. We'll only try to get 3371 * the runqueue lock when things look like they will 3372 * work out! 3373 */ 3374 rq = task_rq(p); 3375 3376 /* 3377 * If the task is actively running on another CPU 3378 * still, just relax and busy-wait without holding 3379 * any locks. 3380 * 3381 * NOTE! Since we don't hold any locks, it's not 3382 * even sure that "rq" stays as the right runqueue! 3383 * But we don't care, since "task_on_cpu()" will 3384 * return false if the runqueue has changed and p 3385 * is actually now running somewhere else! 3386 */ 3387 while (task_on_cpu(rq, p)) { 3388 if (!(READ_ONCE(p->__state) & match_state)) 3389 return 0; 3390 cpu_relax(); 3391 } 3392 3393 /* 3394 * Ok, time to look more closely! We need the rq 3395 * lock now, to be *sure*. If we're wrong, we'll 3396 * just go back and repeat. 3397 */ 3398 rq = task_rq_lock(p, &rf); 3399 trace_sched_wait_task(p); 3400 running = task_on_cpu(rq, p); 3401 queued = task_on_rq_queued(p); 3402 ncsw = 0; 3403 if (READ_ONCE(p->__state) & match_state) 3404 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ 3405 task_rq_unlock(rq, p, &rf); 3406 3407 /* 3408 * If it changed from the expected state, bail out now. 3409 */ 3410 if (unlikely(!ncsw)) 3411 break; 3412 3413 /* 3414 * Was it really running after all now that we 3415 * checked with the proper locks actually held? 3416 * 3417 * Oops. Go back and try again.. 3418 */ 3419 if (unlikely(running)) { 3420 cpu_relax(); 3421 continue; 3422 } 3423 3424 /* 3425 * It's not enough that it's not actively running, 3426 * it must be off the runqueue _entirely_, and not 3427 * preempted! 3428 * 3429 * So if it was still runnable (but just not actively 3430 * running right now), it's preempted, and we should 3431 * yield - it could be a while. 3432 */ 3433 if (unlikely(queued)) { 3434 ktime_t to = NSEC_PER_SEC / HZ; 3435 3436 set_current_state(TASK_UNINTERRUPTIBLE); 3437 schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); 3438 continue; 3439 } 3440 3441 /* 3442 * Ahh, all good. It wasn't running, and it wasn't 3443 * runnable, which means that it will never become 3444 * running in the future either. We're all done! 3445 */ 3446 break; 3447 } 3448 3449 return ncsw; 3450} 3451 | |
3452/*** 3453 * kick_process - kick a running thread to enter/exit the kernel 3454 * @p: the to-be-kicked thread 3455 * 3456 * Cause a process which is running on another CPU to enter 3457 * kernel-mode, without any delay. (to get signals handled.) 3458 * 3459 * NOTE: this function doesn't have to take the runqueue lock, --- 538 unchanged lines hidden (view full) --- 3998 * p::saved_state, which means the code is fully serialized in both cases. 3999 * 4000 * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other 4001 * bits set. This allows to distinguish all wakeup scenarios. 4002 */ 4003static __always_inline 4004bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) 4005{ | 3492/*** 3493 * kick_process - kick a running thread to enter/exit the kernel 3494 * @p: the to-be-kicked thread 3495 * 3496 * Cause a process which is running on another CPU to enter 3497 * kernel-mode, without any delay. (to get signals handled.) 3498 * 3499 * NOTE: this function doesn't have to take the runqueue lock, --- 538 unchanged lines hidden (view full) --- 4038 * p::saved_state, which means the code is fully serialized in both cases. 4039 * 4040 * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other 4041 * bits set. This allows to distinguish all wakeup scenarios. 4042 */ 4043static __always_inline 4044bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) 4045{ |
4046 int match; 4047 |
|
4006 if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) { 4007 WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) && 4008 state != TASK_RTLOCK_WAIT); 4009 } 4010 | 4048 if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) { 4049 WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) && 4050 state != TASK_RTLOCK_WAIT); 4051 } 4052 |
4011 if (READ_ONCE(p->__state) & state) { 4012 *success = 1; 4013 return true; 4014 } | 4053 *success = !!(match = __task_state_match(p, state)); |
4015 4016#ifdef CONFIG_PREEMPT_RT 4017 /* 4018 * Saved state preserves the task state across blocking on 4019 * an RT lock. If the state matches, set p::saved_state to 4020 * TASK_RUNNING, but do not wake the task because it waits 4021 * for a lock wakeup. Also indicate success because from 4022 * the regular waker's point of view this has succeeded. 4023 * 4024 * After acquiring the lock the task will restore p::__state 4025 * from p::saved_state which ensures that the regular 4026 * wakeup is not lost. The restore will also set 4027 * p::saved_state to TASK_RUNNING so any further tests will 4028 * not result in false positives vs. @success 4029 */ | 4054 4055#ifdef CONFIG_PREEMPT_RT 4056 /* 4057 * Saved state preserves the task state across blocking on 4058 * an RT lock. If the state matches, set p::saved_state to 4059 * TASK_RUNNING, but do not wake the task because it waits 4060 * for a lock wakeup. Also indicate success because from 4061 * the regular waker's point of view this has succeeded. 4062 * 4063 * After acquiring the lock the task will restore p::__state 4064 * from p::saved_state which ensures that the regular 4065 * wakeup is not lost. The restore will also set 4066 * p::saved_state to TASK_RUNNING so any further tests will 4067 * not result in false positives vs. @success 4068 */ |
4030 if (p->saved_state & state) { | 4069 if (match < 0) |
4031 p->saved_state = TASK_RUNNING; | 4070 p->saved_state = TASK_RUNNING; |
4032 *success = 1; 4033 } | |
4034#endif | 4071#endif |
4035 return false; | 4072 return match > 0; |
4036} 4037 4038/* 4039 * Notes on Program-Order guarantees on SMP systems. 4040 * 4041 * MIGRATION 4042 * 4043 * The basic program-order guarantee on SMP systems is that when a task [t] --- 5499 unchanged lines hidden (view full) --- 9543 } 9544} 9545 9546void set_rq_offline(struct rq *rq) 9547{ 9548 if (rq->online) { 9549 const struct sched_class *class; 9550 | 4073} 4074 4075/* 4076 * Notes on Program-Order guarantees on SMP systems. 4077 * 4078 * MIGRATION 4079 * 4080 * The basic program-order guarantee on SMP systems is that when a task [t] --- 5499 unchanged lines hidden (view full) --- 9580 } 9581} 9582 9583void set_rq_offline(struct rq *rq) 9584{ 9585 if (rq->online) { 9586 const struct sched_class *class; 9587 |
9588 update_rq_clock(rq); |
|
9551 for_each_class(class) { 9552 if (class->rq_offline) 9553 class->rq_offline(rq); 9554 } 9555 9556 cpumask_clear_cpu(rq->cpu, rq->rd->online); 9557 rq->online = 0; 9558 } --- 125 unchanged lines hidden (view full) --- 9684 * ttwu_queue_cond() and is_cpu_allowed(). 9685 * 9686 * Do sync before park smpboot threads to take care the rcu boost case. 9687 */ 9688 synchronize_rcu(); 9689 9690 rq_lock_irqsave(rq, &rf); 9691 if (rq->rd) { | 9589 for_each_class(class) { 9590 if (class->rq_offline) 9591 class->rq_offline(rq); 9592 } 9593 9594 cpumask_clear_cpu(rq->cpu, rq->rd->online); 9595 rq->online = 0; 9596 } --- 125 unchanged lines hidden (view full) --- 9722 * ttwu_queue_cond() and is_cpu_allowed(). 9723 * 9724 * Do sync before park smpboot threads to take care the rcu boost case. 9725 */ 9726 synchronize_rcu(); 9727 9728 rq_lock_irqsave(rq, &rf); 9729 if (rq->rd) { |
9692 update_rq_clock(rq); | |
9693 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 9694 set_rq_offline(rq); 9695 } 9696 rq_unlock_irqrestore(rq, &rf); 9697 9698#ifdef CONFIG_SCHED_SMT 9699 /* 9700 * When going down, decrement the number of cores with SMT present. --- 1786 unchanged lines hidden (view full) --- 11487 11488void call_trace_sched_update_nr_running(struct rq *rq, int count) 11489{ 11490 trace_sched_update_nr_running_tp(rq, count); 11491} 11492 11493#ifdef CONFIG_SCHED_MM_CID 11494 | 9730 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 9731 set_rq_offline(rq); 9732 } 9733 rq_unlock_irqrestore(rq, &rf); 9734 9735#ifdef CONFIG_SCHED_SMT 9736 /* 9737 * When going down, decrement the number of cores with SMT present. --- 1786 unchanged lines hidden (view full) --- 11524 11525void call_trace_sched_update_nr_running(struct rq *rq, int count) 11526{ 11527 trace_sched_update_nr_running_tp(rq, count); 11528} 11529 11530#ifdef CONFIG_SCHED_MM_CID 11531 |
11495/* | 11532/** |
11496 * @cid_lock: Guarantee forward-progress of cid allocation. 11497 * 11498 * Concurrency ID allocation within a bitmap is mostly lock-free. The cid_lock 11499 * is only used when contention is detected by the lock-free allocation so 11500 * forward progress can be guaranteed. 11501 */ 11502DEFINE_RAW_SPINLOCK(cid_lock); 11503 | 11533 * @cid_lock: Guarantee forward-progress of cid allocation. 11534 * 11535 * Concurrency ID allocation within a bitmap is mostly lock-free. The cid_lock 11536 * is only used when contention is detected by the lock-free allocation so 11537 * forward progress can be guaranteed. 11538 */ 11539DEFINE_RAW_SPINLOCK(cid_lock); 11540 |
11504/* | 11541/** |
11505 * @use_cid_lock: Select cid allocation behavior: lock-free vs spinlock. 11506 * 11507 * When @use_cid_lock is 0, the cid allocation is lock-free. When contention is 11508 * detected, it is set to 1 to ensure that all newly coming allocations are 11509 * serialized by @cid_lock until the allocation which detected contention 11510 * completes and sets @use_cid_lock back to 0. This guarantees forward progress 11511 * of a cid allocation. 11512 */ --- 508 unchanged lines hidden --- | 11542 * @use_cid_lock: Select cid allocation behavior: lock-free vs spinlock. 11543 * 11544 * When @use_cid_lock is 0, the cid allocation is lock-free. When contention is 11545 * detected, it is set to 1 to ensure that all newly coming allocations are 11546 * serialized by @cid_lock until the allocation which detected contention 11547 * completes and sets @use_cid_lock back to 0. This guarantees forward progress 11548 * of a cid allocation. 11549 */ --- 508 unchanged lines hidden --- |