1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 297fb7a0aSIngo Molnar /* 397fb7a0aSIngo Molnar * Scheduler internal types and methods: 497fb7a0aSIngo Molnar */ 5391e43daSPeter Zijlstra #include <linux/sched.h> 6dfc3401aSIngo Molnar #include <linux/sched/autogroup.h> 7cf4aebc2SClark Williams #include <linux/sched/sysctl.h> 8105ab3d8SIngo Molnar #include <linux/sched/topology.h> 98bd75c77SClark Williams #include <linux/sched/rt.h> 10ef8bd77fSIngo Molnar #include <linux/sched/deadline.h> 11e6017571SIngo Molnar #include <linux/sched/clock.h> 1284f001e1SIngo Molnar #include <linux/sched/wake_q.h> 133f07c014SIngo Molnar #include <linux/sched/signal.h> 146a3827d7SIngo Molnar #include <linux/sched/numa_balancing.h> 156e84f315SIngo Molnar #include <linux/sched/mm.h> 1655687da1SIngo Molnar #include <linux/sched/cpufreq.h> 1703441a34SIngo Molnar #include <linux/sched/stat.h> 18370c9135SIngo Molnar #include <linux/sched/nohz.h> 19b17b0153SIngo Molnar #include <linux/sched/debug.h> 20ef8bd77fSIngo Molnar #include <linux/sched/hotplug.h> 2129930025SIngo Molnar #include <linux/sched/task.h> 2268db0cf1SIngo Molnar #include <linux/sched/task_stack.h> 2332ef5517SIngo Molnar #include <linux/sched/cputime.h> 241777e463SIngo Molnar #include <linux/sched/init.h> 25ef8bd77fSIngo Molnar 2619d23dbfSFrederic Weisbecker #include <linux/u64_stats_sync.h> 27a499a5a1SFrederic Weisbecker #include <linux/kernel_stat.h> 283866e845SSteven Rostedt (Red Hat) #include <linux/binfmts.h> 29391e43daSPeter Zijlstra #include <linux/mutex.h> 30391e43daSPeter Zijlstra #include <linux/spinlock.h> 31391e43daSPeter Zijlstra #include <linux/stop_machine.h> 32b6366f04SSteven Rostedt #include <linux/irq_work.h> 339f3660c2SFrederic Weisbecker #include <linux/tick.h> 34f809ca9aSMel Gorman #include <linux/slab.h> 35d2cc5ed6STejun Heo #include <linux/cgroup.h> 36391e43daSPeter Zijlstra 377fce777cSIngo Molnar #ifdef CONFIG_PARAVIRT 387fce777cSIngo Molnar #include <asm/paravirt.h> 397fce777cSIngo Molnar #endif 407fce777cSIngo Molnar 41391e43daSPeter Zijlstra #include "cpupri.h" 426bfd6d72SJuri Lelli #include "cpudeadline.h" 43391e43daSPeter Zijlstra 449148a3a1SPeter Zijlstra #ifdef CONFIG_SCHED_DEBUG 459148a3a1SPeter Zijlstra # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) 469148a3a1SPeter Zijlstra #else 476d3aed3dSIngo Molnar # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) 489148a3a1SPeter Zijlstra #endif 499148a3a1SPeter Zijlstra 5045ceebf7SPaul Gortmaker struct rq; 51442bf3aaSDaniel Lezcano struct cpuidle_state; 5245ceebf7SPaul Gortmaker 53da0c1e65SKirill Tkhai /* task_struct::on_rq states: */ 54da0c1e65SKirill Tkhai #define TASK_ON_RQ_QUEUED 1 55cca26e80SKirill Tkhai #define TASK_ON_RQ_MIGRATING 2 56da0c1e65SKirill Tkhai 57391e43daSPeter Zijlstra extern __read_mostly int scheduler_running; 58391e43daSPeter Zijlstra 5945ceebf7SPaul Gortmaker extern unsigned long calc_load_update; 6045ceebf7SPaul Gortmaker extern atomic_long_t calc_load_tasks; 6145ceebf7SPaul Gortmaker 623289bdb4SPeter Zijlstra extern void calc_global_load_tick(struct rq *this_rq); 63d60585c5SThomas Gleixner extern long calc_load_fold_active(struct rq *this_rq, long adjust); 643289bdb4SPeter Zijlstra 653289bdb4SPeter Zijlstra #ifdef CONFIG_SMP 66cee1afceSFrederic Weisbecker extern void cpu_load_update_active(struct rq *this_rq); 673289bdb4SPeter Zijlstra #else 68cee1afceSFrederic Weisbecker static inline void cpu_load_update_active(struct rq *this_rq) { } 693289bdb4SPeter Zijlstra #endif 7045ceebf7SPaul Gortmaker 71391e43daSPeter Zijlstra /* 72391e43daSPeter Zijlstra * Helpers for converting nanosecond timing to jiffy resolution 73391e43daSPeter Zijlstra */ 74391e43daSPeter Zijlstra #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) 75391e43daSPeter Zijlstra 76cc1f4b1fSLi Zefan /* 77cc1f4b1fSLi Zefan * Increase resolution of nice-level calculations for 64-bit architectures. 78cc1f4b1fSLi Zefan * The extra resolution improves shares distribution and load balancing of 79cc1f4b1fSLi Zefan * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup 80cc1f4b1fSLi Zefan * hierarchies, especially on larger systems. This is not a user-visible change 81cc1f4b1fSLi Zefan * and does not change the user-interface for setting shares/weights. 82cc1f4b1fSLi Zefan * 83cc1f4b1fSLi Zefan * We increase resolution only if we have enough bits to allow this increased 8497fb7a0aSIngo Molnar * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit 8597fb7a0aSIngo Molnar * are pretty high and the returns do not justify the increased costs. 862159197dSPeter Zijlstra * 8797fb7a0aSIngo Molnar * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to 8897fb7a0aSIngo Molnar * increase coverage and consistency always enable it on 64-bit platforms. 89cc1f4b1fSLi Zefan */ 902159197dSPeter Zijlstra #ifdef CONFIG_64BIT 91172895e6SYuyang Du # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) 926ecdd749SYuyang Du # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) 936ecdd749SYuyang Du # define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT) 94cc1f4b1fSLi Zefan #else 95172895e6SYuyang Du # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) 96cc1f4b1fSLi Zefan # define scale_load(w) (w) 97cc1f4b1fSLi Zefan # define scale_load_down(w) (w) 98cc1f4b1fSLi Zefan #endif 99cc1f4b1fSLi Zefan 1006ecdd749SYuyang Du /* 101172895e6SYuyang Du * Task weight (visible to users) and its load (invisible to users) have 102172895e6SYuyang Du * independent resolution, but they should be well calibrated. We use 103172895e6SYuyang Du * scale_load() and scale_load_down(w) to convert between them. The 104172895e6SYuyang Du * following must be true: 105172895e6SYuyang Du * 106172895e6SYuyang Du * scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD 107172895e6SYuyang Du * 1086ecdd749SYuyang Du */ 109172895e6SYuyang Du #define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT) 110391e43daSPeter Zijlstra 111391e43daSPeter Zijlstra /* 112332ac17eSDario Faggioli * Single value that decides SCHED_DEADLINE internal math precision. 113332ac17eSDario Faggioli * 10 -> just above 1us 114332ac17eSDario Faggioli * 9 -> just above 0.5us 115332ac17eSDario Faggioli */ 11697fb7a0aSIngo Molnar #define DL_SCALE 10 117332ac17eSDario Faggioli 118332ac17eSDario Faggioli /* 11997fb7a0aSIngo Molnar * Single value that denotes runtime == period, ie unlimited time. 120391e43daSPeter Zijlstra */ 121391e43daSPeter Zijlstra #define RUNTIME_INF ((u64)~0ULL) 122391e43daSPeter Zijlstra 12320f9cd2aSHenrik Austad static inline int idle_policy(int policy) 12420f9cd2aSHenrik Austad { 12520f9cd2aSHenrik Austad return policy == SCHED_IDLE; 12620f9cd2aSHenrik Austad } 127d50dde5aSDario Faggioli static inline int fair_policy(int policy) 128d50dde5aSDario Faggioli { 129d50dde5aSDario Faggioli return policy == SCHED_NORMAL || policy == SCHED_BATCH; 130d50dde5aSDario Faggioli } 131d50dde5aSDario Faggioli 132391e43daSPeter Zijlstra static inline int rt_policy(int policy) 133391e43daSPeter Zijlstra { 134d50dde5aSDario Faggioli return policy == SCHED_FIFO || policy == SCHED_RR; 135391e43daSPeter Zijlstra } 136391e43daSPeter Zijlstra 137aab03e05SDario Faggioli static inline int dl_policy(int policy) 138aab03e05SDario Faggioli { 139aab03e05SDario Faggioli return policy == SCHED_DEADLINE; 140aab03e05SDario Faggioli } 14120f9cd2aSHenrik Austad static inline bool valid_policy(int policy) 14220f9cd2aSHenrik Austad { 14320f9cd2aSHenrik Austad return idle_policy(policy) || fair_policy(policy) || 14420f9cd2aSHenrik Austad rt_policy(policy) || dl_policy(policy); 14520f9cd2aSHenrik Austad } 146aab03e05SDario Faggioli 147391e43daSPeter Zijlstra static inline int task_has_rt_policy(struct task_struct *p) 148391e43daSPeter Zijlstra { 149391e43daSPeter Zijlstra return rt_policy(p->policy); 150391e43daSPeter Zijlstra } 151391e43daSPeter Zijlstra 152aab03e05SDario Faggioli static inline int task_has_dl_policy(struct task_struct *p) 153aab03e05SDario Faggioli { 154aab03e05SDario Faggioli return dl_policy(p->policy); 155aab03e05SDario Faggioli } 156aab03e05SDario Faggioli 15707881166SJuri Lelli #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) 15807881166SJuri Lelli 1592d3d891dSDario Faggioli /* 160794a56ebSJuri Lelli * !! For sched_setattr_nocheck() (kernel) only !! 161794a56ebSJuri Lelli * 162794a56ebSJuri Lelli * This is actually gross. :( 163794a56ebSJuri Lelli * 164794a56ebSJuri Lelli * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE 165794a56ebSJuri Lelli * tasks, but still be able to sleep. We need this on platforms that cannot 166794a56ebSJuri Lelli * atomically change clock frequency. Remove once fast switching will be 167794a56ebSJuri Lelli * available on such platforms. 168794a56ebSJuri Lelli * 169794a56ebSJuri Lelli * SUGOV stands for SchedUtil GOVernor. 170794a56ebSJuri Lelli */ 171794a56ebSJuri Lelli #define SCHED_FLAG_SUGOV 0x10000000 172794a56ebSJuri Lelli 173794a56ebSJuri Lelli static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se) 174794a56ebSJuri Lelli { 175794a56ebSJuri Lelli #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL 176794a56ebSJuri Lelli return unlikely(dl_se->flags & SCHED_FLAG_SUGOV); 177794a56ebSJuri Lelli #else 178794a56ebSJuri Lelli return false; 179794a56ebSJuri Lelli #endif 180794a56ebSJuri Lelli } 181794a56ebSJuri Lelli 182794a56ebSJuri Lelli /* 1832d3d891dSDario Faggioli * Tells if entity @a should preempt entity @b. 1842d3d891dSDario Faggioli */ 185332ac17eSDario Faggioli static inline bool 186332ac17eSDario Faggioli dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) 1872d3d891dSDario Faggioli { 188794a56ebSJuri Lelli return dl_entity_is_special(a) || 189794a56ebSJuri Lelli dl_time_before(a->deadline, b->deadline); 1902d3d891dSDario Faggioli } 1912d3d891dSDario Faggioli 192391e43daSPeter Zijlstra /* 193391e43daSPeter Zijlstra * This is the priority-queue data structure of the RT scheduling class: 194391e43daSPeter Zijlstra */ 195391e43daSPeter Zijlstra struct rt_prio_array { 196391e43daSPeter Zijlstra DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */ 197391e43daSPeter Zijlstra struct list_head queue[MAX_RT_PRIO]; 198391e43daSPeter Zijlstra }; 199391e43daSPeter Zijlstra 200391e43daSPeter Zijlstra struct rt_bandwidth { 201391e43daSPeter Zijlstra /* nests inside the rq lock: */ 202391e43daSPeter Zijlstra raw_spinlock_t rt_runtime_lock; 203391e43daSPeter Zijlstra ktime_t rt_period; 204391e43daSPeter Zijlstra u64 rt_runtime; 205391e43daSPeter Zijlstra struct hrtimer rt_period_timer; 2064cfafd30SPeter Zijlstra unsigned int rt_period_active; 207391e43daSPeter Zijlstra }; 208a5e7be3bSJuri Lelli 209a5e7be3bSJuri Lelli void __dl_clear_params(struct task_struct *p); 210a5e7be3bSJuri Lelli 211332ac17eSDario Faggioli /* 212332ac17eSDario Faggioli * To keep the bandwidth of -deadline tasks and groups under control 213332ac17eSDario Faggioli * we need some place where: 214332ac17eSDario Faggioli * - store the maximum -deadline bandwidth of the system (the group); 215332ac17eSDario Faggioli * - cache the fraction of that bandwidth that is currently allocated. 216332ac17eSDario Faggioli * 217332ac17eSDario Faggioli * This is all done in the data structure below. It is similar to the 218332ac17eSDario Faggioli * one used for RT-throttling (rt_bandwidth), with the main difference 219332ac17eSDario Faggioli * that, since here we are only interested in admission control, we 220332ac17eSDario Faggioli * do not decrease any runtime while the group "executes", neither we 221332ac17eSDario Faggioli * need a timer to replenish it. 222332ac17eSDario Faggioli * 223332ac17eSDario Faggioli * With respect to SMP, the bandwidth is given on a per-CPU basis, 224332ac17eSDario Faggioli * meaning that: 225332ac17eSDario Faggioli * - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU; 226332ac17eSDario Faggioli * - dl_total_bw array contains, in the i-eth element, the currently 227332ac17eSDario Faggioli * allocated bandwidth on the i-eth CPU. 228332ac17eSDario Faggioli * Moreover, groups consume bandwidth on each CPU, while tasks only 229332ac17eSDario Faggioli * consume bandwidth on the CPU they're running on. 230332ac17eSDario Faggioli * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw 231332ac17eSDario Faggioli * that will be shown the next time the proc or cgroup controls will 232332ac17eSDario Faggioli * be red. It on its turn can be changed by writing on its own 233332ac17eSDario Faggioli * control. 234332ac17eSDario Faggioli */ 235332ac17eSDario Faggioli struct dl_bandwidth { 236332ac17eSDario Faggioli raw_spinlock_t dl_runtime_lock; 237332ac17eSDario Faggioli u64 dl_runtime; 238332ac17eSDario Faggioli u64 dl_period; 239332ac17eSDario Faggioli }; 240332ac17eSDario Faggioli 241332ac17eSDario Faggioli static inline int dl_bandwidth_enabled(void) 242332ac17eSDario Faggioli { 2431724813dSPeter Zijlstra return sysctl_sched_rt_runtime >= 0; 244332ac17eSDario Faggioli } 245332ac17eSDario Faggioli 246332ac17eSDario Faggioli struct dl_bw { 247332ac17eSDario Faggioli raw_spinlock_t lock; 24897fb7a0aSIngo Molnar u64 bw; 24997fb7a0aSIngo Molnar u64 total_bw; 250332ac17eSDario Faggioli }; 251332ac17eSDario Faggioli 252daec5798SLuca Abeni static inline void __dl_update(struct dl_bw *dl_b, s64 bw); 253daec5798SLuca Abeni 2547f51412aSJuri Lelli static inline 2558c0944ceSPeter Zijlstra void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus) 2567f51412aSJuri Lelli { 2577f51412aSJuri Lelli dl_b->total_bw -= tsk_bw; 258daec5798SLuca Abeni __dl_update(dl_b, (s32)tsk_bw / cpus); 2597f51412aSJuri Lelli } 2607f51412aSJuri Lelli 2617f51412aSJuri Lelli static inline 262daec5798SLuca Abeni void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus) 2637f51412aSJuri Lelli { 2647f51412aSJuri Lelli dl_b->total_bw += tsk_bw; 265daec5798SLuca Abeni __dl_update(dl_b, -((s32)tsk_bw / cpus)); 2667f51412aSJuri Lelli } 2677f51412aSJuri Lelli 2687f51412aSJuri Lelli static inline 2697f51412aSJuri Lelli bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) 2707f51412aSJuri Lelli { 2717f51412aSJuri Lelli return dl_b->bw != -1 && 2727f51412aSJuri Lelli dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; 2737f51412aSJuri Lelli } 2747f51412aSJuri Lelli 27597fb7a0aSIngo Molnar extern void dl_change_utilization(struct task_struct *p, u64 new_bw); 276f2cb1360SIngo Molnar extern void init_dl_bw(struct dl_bw *dl_b); 27706a76fe0SNicolas Pitre extern int sched_dl_global_validate(void); 27806a76fe0SNicolas Pitre extern void sched_dl_do_global(void); 27997fb7a0aSIngo Molnar extern int sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr); 28006a76fe0SNicolas Pitre extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr); 28106a76fe0SNicolas Pitre extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr); 28206a76fe0SNicolas Pitre extern bool __checkparam_dl(const struct sched_attr *attr); 28306a76fe0SNicolas Pitre extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); 28497fb7a0aSIngo Molnar extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); 28597fb7a0aSIngo Molnar extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); 28606a76fe0SNicolas Pitre extern bool dl_cpu_busy(unsigned int cpu); 287391e43daSPeter Zijlstra 288391e43daSPeter Zijlstra #ifdef CONFIG_CGROUP_SCHED 289391e43daSPeter Zijlstra 290391e43daSPeter Zijlstra #include <linux/cgroup.h> 291391e43daSPeter Zijlstra 292391e43daSPeter Zijlstra struct cfs_rq; 293391e43daSPeter Zijlstra struct rt_rq; 294391e43daSPeter Zijlstra 29535cf4e50SMike Galbraith extern struct list_head task_groups; 296391e43daSPeter Zijlstra 297391e43daSPeter Zijlstra struct cfs_bandwidth { 298391e43daSPeter Zijlstra #ifdef CONFIG_CFS_BANDWIDTH 299391e43daSPeter Zijlstra raw_spinlock_t lock; 300391e43daSPeter Zijlstra ktime_t period; 30197fb7a0aSIngo Molnar u64 quota; 30297fb7a0aSIngo Molnar u64 runtime; 3039c58c79aSZhihui Zhang s64 hierarchical_quota; 304391e43daSPeter Zijlstra u64 runtime_expires; 305391e43daSPeter Zijlstra 30697fb7a0aSIngo Molnar int idle; 30797fb7a0aSIngo Molnar int period_active; 30897fb7a0aSIngo Molnar struct hrtimer period_timer; 30997fb7a0aSIngo Molnar struct hrtimer slack_timer; 310391e43daSPeter Zijlstra struct list_head throttled_cfs_rq; 311391e43daSPeter Zijlstra 31297fb7a0aSIngo Molnar /* Statistics: */ 31397fb7a0aSIngo Molnar int nr_periods; 31497fb7a0aSIngo Molnar int nr_throttled; 315391e43daSPeter Zijlstra u64 throttled_time; 316391e43daSPeter Zijlstra #endif 317391e43daSPeter Zijlstra }; 318391e43daSPeter Zijlstra 31997fb7a0aSIngo Molnar /* Task group related information */ 320391e43daSPeter Zijlstra struct task_group { 321391e43daSPeter Zijlstra struct cgroup_subsys_state css; 322391e43daSPeter Zijlstra 323391e43daSPeter Zijlstra #ifdef CONFIG_FAIR_GROUP_SCHED 32497fb7a0aSIngo Molnar /* schedulable entities of this group on each CPU */ 325391e43daSPeter Zijlstra struct sched_entity **se; 32697fb7a0aSIngo Molnar /* runqueue "owned" by this group on each CPU */ 327391e43daSPeter Zijlstra struct cfs_rq **cfs_rq; 328391e43daSPeter Zijlstra unsigned long shares; 329391e43daSPeter Zijlstra 330fa6bddebSAlex Shi #ifdef CONFIG_SMP 331b0367629SWaiman Long /* 332b0367629SWaiman Long * load_avg can be heavily contended at clock tick time, so put 333b0367629SWaiman Long * it in its own cacheline separated from the fields above which 334b0367629SWaiman Long * will also be accessed at each tick. 335b0367629SWaiman Long */ 336b0367629SWaiman Long atomic_long_t load_avg ____cacheline_aligned; 337391e43daSPeter Zijlstra #endif 338fa6bddebSAlex Shi #endif 339391e43daSPeter Zijlstra 340391e43daSPeter Zijlstra #ifdef CONFIG_RT_GROUP_SCHED 341391e43daSPeter Zijlstra struct sched_rt_entity **rt_se; 342391e43daSPeter Zijlstra struct rt_rq **rt_rq; 343391e43daSPeter Zijlstra 344391e43daSPeter Zijlstra struct rt_bandwidth rt_bandwidth; 345391e43daSPeter Zijlstra #endif 346391e43daSPeter Zijlstra 347391e43daSPeter Zijlstra struct rcu_head rcu; 348391e43daSPeter Zijlstra struct list_head list; 349391e43daSPeter Zijlstra 350391e43daSPeter Zijlstra struct task_group *parent; 351391e43daSPeter Zijlstra struct list_head siblings; 352391e43daSPeter Zijlstra struct list_head children; 353391e43daSPeter Zijlstra 354391e43daSPeter Zijlstra #ifdef CONFIG_SCHED_AUTOGROUP 355391e43daSPeter Zijlstra struct autogroup *autogroup; 356391e43daSPeter Zijlstra #endif 357391e43daSPeter Zijlstra 358391e43daSPeter Zijlstra struct cfs_bandwidth cfs_bandwidth; 359391e43daSPeter Zijlstra }; 360391e43daSPeter Zijlstra 361391e43daSPeter Zijlstra #ifdef CONFIG_FAIR_GROUP_SCHED 362391e43daSPeter Zijlstra #define ROOT_TASK_GROUP_LOAD NICE_0_LOAD 363391e43daSPeter Zijlstra 364391e43daSPeter Zijlstra /* 365391e43daSPeter Zijlstra * A weight of 0 or 1 can cause arithmetics problems. 366391e43daSPeter Zijlstra * A weight of a cfs_rq is the sum of weights of which entities 367391e43daSPeter Zijlstra * are queued on this cfs_rq, so a weight of a entity should not be 368391e43daSPeter Zijlstra * too large, so as the shares value of a task group. 369391e43daSPeter Zijlstra * (The default weight is 1024 - so there's no practical 370391e43daSPeter Zijlstra * limitation from this.) 371391e43daSPeter Zijlstra */ 372391e43daSPeter Zijlstra #define MIN_SHARES (1UL << 1) 373391e43daSPeter Zijlstra #define MAX_SHARES (1UL << 18) 374391e43daSPeter Zijlstra #endif 375391e43daSPeter Zijlstra 376391e43daSPeter Zijlstra typedef int (*tg_visitor)(struct task_group *, void *); 377391e43daSPeter Zijlstra 378391e43daSPeter Zijlstra extern int walk_tg_tree_from(struct task_group *from, 379391e43daSPeter Zijlstra tg_visitor down, tg_visitor up, void *data); 380391e43daSPeter Zijlstra 381391e43daSPeter Zijlstra /* 382391e43daSPeter Zijlstra * Iterate the full tree, calling @down when first entering a node and @up when 383391e43daSPeter Zijlstra * leaving it for the final time. 384391e43daSPeter Zijlstra * 385391e43daSPeter Zijlstra * Caller must hold rcu_lock or sufficient equivalent. 386391e43daSPeter Zijlstra */ 387391e43daSPeter Zijlstra static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) 388391e43daSPeter Zijlstra { 389391e43daSPeter Zijlstra return walk_tg_tree_from(&root_task_group, down, up, data); 390391e43daSPeter Zijlstra } 391391e43daSPeter Zijlstra 392391e43daSPeter Zijlstra extern int tg_nop(struct task_group *tg, void *data); 393391e43daSPeter Zijlstra 394391e43daSPeter Zijlstra extern void free_fair_sched_group(struct task_group *tg); 395391e43daSPeter Zijlstra extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent); 3968663e24dSPeter Zijlstra extern void online_fair_sched_group(struct task_group *tg); 3976fe1f348SPeter Zijlstra extern void unregister_fair_sched_group(struct task_group *tg); 398391e43daSPeter Zijlstra extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, 399391e43daSPeter Zijlstra struct sched_entity *se, int cpu, 400391e43daSPeter Zijlstra struct sched_entity *parent); 401391e43daSPeter Zijlstra extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b); 402391e43daSPeter Zijlstra 403391e43daSPeter Zijlstra extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b); 40477a4d1a1SPeter Zijlstra extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b); 405391e43daSPeter Zijlstra extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq); 406391e43daSPeter Zijlstra 407391e43daSPeter Zijlstra extern void free_rt_sched_group(struct task_group *tg); 408391e43daSPeter Zijlstra extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent); 409391e43daSPeter Zijlstra extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 410391e43daSPeter Zijlstra struct sched_rt_entity *rt_se, int cpu, 411391e43daSPeter Zijlstra struct sched_rt_entity *parent); 4128887cd99SNicolas Pitre extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us); 4138887cd99SNicolas Pitre extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us); 4148887cd99SNicolas Pitre extern long sched_group_rt_runtime(struct task_group *tg); 4158887cd99SNicolas Pitre extern long sched_group_rt_period(struct task_group *tg); 4168887cd99SNicolas Pitre extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); 417391e43daSPeter Zijlstra 41825cc7da7SLi Zefan extern struct task_group *sched_create_group(struct task_group *parent); 41925cc7da7SLi Zefan extern void sched_online_group(struct task_group *tg, 42025cc7da7SLi Zefan struct task_group *parent); 42125cc7da7SLi Zefan extern void sched_destroy_group(struct task_group *tg); 42225cc7da7SLi Zefan extern void sched_offline_group(struct task_group *tg); 42325cc7da7SLi Zefan 42425cc7da7SLi Zefan extern void sched_move_task(struct task_struct *tsk); 42525cc7da7SLi Zefan 42625cc7da7SLi Zefan #ifdef CONFIG_FAIR_GROUP_SCHED 42725cc7da7SLi Zefan extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); 428ad936d86SByungchul Park 429ad936d86SByungchul Park #ifdef CONFIG_SMP 430ad936d86SByungchul Park extern void set_task_rq_fair(struct sched_entity *se, 431ad936d86SByungchul Park struct cfs_rq *prev, struct cfs_rq *next); 432ad936d86SByungchul Park #else /* !CONFIG_SMP */ 433ad936d86SByungchul Park static inline void set_task_rq_fair(struct sched_entity *se, 434ad936d86SByungchul Park struct cfs_rq *prev, struct cfs_rq *next) { } 435ad936d86SByungchul Park #endif /* CONFIG_SMP */ 436ad936d86SByungchul Park #endif /* CONFIG_FAIR_GROUP_SCHED */ 43725cc7da7SLi Zefan 438391e43daSPeter Zijlstra #else /* CONFIG_CGROUP_SCHED */ 439391e43daSPeter Zijlstra 440391e43daSPeter Zijlstra struct cfs_bandwidth { }; 441391e43daSPeter Zijlstra 442391e43daSPeter Zijlstra #endif /* CONFIG_CGROUP_SCHED */ 443391e43daSPeter Zijlstra 444391e43daSPeter Zijlstra /* CFS-related fields in a runqueue */ 445391e43daSPeter Zijlstra struct cfs_rq { 446391e43daSPeter Zijlstra struct load_weight load; 4471ea6c46aSPeter Zijlstra unsigned long runnable_weight; 44897fb7a0aSIngo Molnar unsigned int nr_running; 44997fb7a0aSIngo Molnar unsigned int h_nr_running; 450391e43daSPeter Zijlstra 451391e43daSPeter Zijlstra u64 exec_clock; 452391e43daSPeter Zijlstra u64 min_vruntime; 453391e43daSPeter Zijlstra #ifndef CONFIG_64BIT 454391e43daSPeter Zijlstra u64 min_vruntime_copy; 455391e43daSPeter Zijlstra #endif 456391e43daSPeter Zijlstra 457bfb06889SDavidlohr Bueso struct rb_root_cached tasks_timeline; 458391e43daSPeter Zijlstra 459391e43daSPeter Zijlstra /* 460391e43daSPeter Zijlstra * 'curr' points to currently running entity on this cfs_rq. 461391e43daSPeter Zijlstra * It is set to NULL otherwise (i.e when none are currently running). 462391e43daSPeter Zijlstra */ 46397fb7a0aSIngo Molnar struct sched_entity *curr; 46497fb7a0aSIngo Molnar struct sched_entity *next; 46597fb7a0aSIngo Molnar struct sched_entity *last; 46697fb7a0aSIngo Molnar struct sched_entity *skip; 467391e43daSPeter Zijlstra 468391e43daSPeter Zijlstra #ifdef CONFIG_SCHED_DEBUG 469391e43daSPeter Zijlstra unsigned int nr_spread_over; 470391e43daSPeter Zijlstra #endif 471391e43daSPeter Zijlstra 4722dac754eSPaul Turner #ifdef CONFIG_SMP 4732dac754eSPaul Turner /* 4749d89c257SYuyang Du * CFS load tracking 4752dac754eSPaul Turner */ 4769d89c257SYuyang Du struct sched_avg avg; 4772a2f5d4eSPeter Zijlstra #ifndef CONFIG_64BIT 4782a2f5d4eSPeter Zijlstra u64 load_last_update_time_copy; 4792a2f5d4eSPeter Zijlstra #endif 4802a2f5d4eSPeter Zijlstra struct { 4812a2f5d4eSPeter Zijlstra raw_spinlock_t lock ____cacheline_aligned; 4822a2f5d4eSPeter Zijlstra int nr; 4832a2f5d4eSPeter Zijlstra unsigned long load_avg; 4842a2f5d4eSPeter Zijlstra unsigned long util_avg; 4850e2d2aaaSPeter Zijlstra unsigned long runnable_sum; 4862a2f5d4eSPeter Zijlstra } removed; 487141965c7SAlex Shi 488c566e8e9SPaul Turner #ifdef CONFIG_FAIR_GROUP_SCHED 4890e2d2aaaSPeter Zijlstra unsigned long tg_load_avg_contrib; 4900e2d2aaaSPeter Zijlstra long propagate; 4910e2d2aaaSPeter Zijlstra long prop_runnable_sum; 4920e2d2aaaSPeter Zijlstra 49382958366SPaul Turner /* 49482958366SPaul Turner * h_load = weight * f(tg) 49582958366SPaul Turner * 49682958366SPaul Turner * Where f(tg) is the recursive weight fraction assigned to 49782958366SPaul Turner * this group. 49882958366SPaul Turner */ 49982958366SPaul Turner unsigned long h_load; 50068520796SVladimir Davydov u64 last_h_load_update; 50168520796SVladimir Davydov struct sched_entity *h_load_next; 50268520796SVladimir Davydov #endif /* CONFIG_FAIR_GROUP_SCHED */ 50382958366SPaul Turner #endif /* CONFIG_SMP */ 50482958366SPaul Turner 505391e43daSPeter Zijlstra #ifdef CONFIG_FAIR_GROUP_SCHED 50697fb7a0aSIngo Molnar struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */ 507391e43daSPeter Zijlstra 508391e43daSPeter Zijlstra /* 509391e43daSPeter Zijlstra * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in 510391e43daSPeter Zijlstra * a hierarchy). Non-leaf lrqs hold other higher schedulable entities 511391e43daSPeter Zijlstra * (like users, containers etc.) 512391e43daSPeter Zijlstra * 51397fb7a0aSIngo Molnar * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU. 51497fb7a0aSIngo Molnar * This list is used during load balance. 515391e43daSPeter Zijlstra */ 516391e43daSPeter Zijlstra int on_list; 517391e43daSPeter Zijlstra struct list_head leaf_cfs_rq_list; 518391e43daSPeter Zijlstra struct task_group *tg; /* group that "owns" this runqueue */ 519391e43daSPeter Zijlstra 520391e43daSPeter Zijlstra #ifdef CONFIG_CFS_BANDWIDTH 521391e43daSPeter Zijlstra int runtime_enabled; 522391e43daSPeter Zijlstra u64 runtime_expires; 523391e43daSPeter Zijlstra s64 runtime_remaining; 524391e43daSPeter Zijlstra 52597fb7a0aSIngo Molnar u64 throttled_clock; 52697fb7a0aSIngo Molnar u64 throttled_clock_task; 527f1b17280SPaul Turner u64 throttled_clock_task_time; 52897fb7a0aSIngo Molnar int throttled; 52997fb7a0aSIngo Molnar int throttle_count; 530391e43daSPeter Zijlstra struct list_head throttled_list; 531391e43daSPeter Zijlstra #endif /* CONFIG_CFS_BANDWIDTH */ 532391e43daSPeter Zijlstra #endif /* CONFIG_FAIR_GROUP_SCHED */ 533391e43daSPeter Zijlstra }; 534391e43daSPeter Zijlstra 535391e43daSPeter Zijlstra static inline int rt_bandwidth_enabled(void) 536391e43daSPeter Zijlstra { 537391e43daSPeter Zijlstra return sysctl_sched_rt_runtime >= 0; 538391e43daSPeter Zijlstra } 539391e43daSPeter Zijlstra 540b6366f04SSteven Rostedt /* RT IPI pull logic requires IRQ_WORK */ 5414bdced5cSSteven Rostedt (Red Hat) #if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP) 542b6366f04SSteven Rostedt # define HAVE_RT_PUSH_IPI 543b6366f04SSteven Rostedt #endif 544b6366f04SSteven Rostedt 545391e43daSPeter Zijlstra /* Real-Time classes' related field in a runqueue: */ 546391e43daSPeter Zijlstra struct rt_rq { 547391e43daSPeter Zijlstra struct rt_prio_array active; 548c82513e5SPeter Zijlstra unsigned int rt_nr_running; 54901d36d0aSFrederic Weisbecker unsigned int rr_nr_running; 550391e43daSPeter Zijlstra #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 551391e43daSPeter Zijlstra struct { 552391e43daSPeter Zijlstra int curr; /* highest queued rt task prio */ 553391e43daSPeter Zijlstra #ifdef CONFIG_SMP 554391e43daSPeter Zijlstra int next; /* next highest */ 555391e43daSPeter Zijlstra #endif 556391e43daSPeter Zijlstra } highest_prio; 557391e43daSPeter Zijlstra #endif 558391e43daSPeter Zijlstra #ifdef CONFIG_SMP 559391e43daSPeter Zijlstra unsigned long rt_nr_migratory; 560391e43daSPeter Zijlstra unsigned long rt_nr_total; 561391e43daSPeter Zijlstra int overloaded; 562391e43daSPeter Zijlstra struct plist_head pushable_tasks; 563b6366f04SSteven Rostedt #endif /* CONFIG_SMP */ 564f4ebcbc0SKirill Tkhai int rt_queued; 565f4ebcbc0SKirill Tkhai 566391e43daSPeter Zijlstra int rt_throttled; 567391e43daSPeter Zijlstra u64 rt_time; 568391e43daSPeter Zijlstra u64 rt_runtime; 569391e43daSPeter Zijlstra /* Nests inside the rq lock: */ 570391e43daSPeter Zijlstra raw_spinlock_t rt_runtime_lock; 571391e43daSPeter Zijlstra 572391e43daSPeter Zijlstra #ifdef CONFIG_RT_GROUP_SCHED 573391e43daSPeter Zijlstra unsigned long rt_nr_boosted; 574391e43daSPeter Zijlstra 575391e43daSPeter Zijlstra struct rq *rq; 576391e43daSPeter Zijlstra struct task_group *tg; 577391e43daSPeter Zijlstra #endif 578391e43daSPeter Zijlstra }; 579391e43daSPeter Zijlstra 580aab03e05SDario Faggioli /* Deadline class' related fields in a runqueue */ 581aab03e05SDario Faggioli struct dl_rq { 582aab03e05SDario Faggioli /* runqueue is an rbtree, ordered by deadline */ 5832161573eSDavidlohr Bueso struct rb_root_cached root; 584aab03e05SDario Faggioli 585aab03e05SDario Faggioli unsigned long dl_nr_running; 5861baca4ceSJuri Lelli 5871baca4ceSJuri Lelli #ifdef CONFIG_SMP 5881baca4ceSJuri Lelli /* 5891baca4ceSJuri Lelli * Deadline values of the currently executing and the 5901baca4ceSJuri Lelli * earliest ready task on this rq. Caching these facilitates 5911baca4ceSJuri Lelli * the decision wether or not a ready but not running task 5921baca4ceSJuri Lelli * should migrate somewhere else. 5931baca4ceSJuri Lelli */ 5941baca4ceSJuri Lelli struct { 5951baca4ceSJuri Lelli u64 curr; 5961baca4ceSJuri Lelli u64 next; 5971baca4ceSJuri Lelli } earliest_dl; 5981baca4ceSJuri Lelli 5991baca4ceSJuri Lelli unsigned long dl_nr_migratory; 6001baca4ceSJuri Lelli int overloaded; 6011baca4ceSJuri Lelli 6021baca4ceSJuri Lelli /* 6031baca4ceSJuri Lelli * Tasks on this rq that can be pushed away. They are kept in 6041baca4ceSJuri Lelli * an rb-tree, ordered by tasks' deadlines, with caching 6051baca4ceSJuri Lelli * of the leftmost (earliest deadline) element. 6061baca4ceSJuri Lelli */ 6072161573eSDavidlohr Bueso struct rb_root_cached pushable_dl_tasks_root; 608332ac17eSDario Faggioli #else 609332ac17eSDario Faggioli struct dl_bw dl_bw; 6101baca4ceSJuri Lelli #endif 611e36d8677SLuca Abeni /* 612e36d8677SLuca Abeni * "Active utilization" for this runqueue: increased when a 613e36d8677SLuca Abeni * task wakes up (becomes TASK_RUNNING) and decreased when a 614e36d8677SLuca Abeni * task blocks 615e36d8677SLuca Abeni */ 616e36d8677SLuca Abeni u64 running_bw; 6174da3abceSLuca Abeni 6184da3abceSLuca Abeni /* 6198fd27231SLuca Abeni * Utilization of the tasks "assigned" to this runqueue (including 6208fd27231SLuca Abeni * the tasks that are in runqueue and the tasks that executed on this 6218fd27231SLuca Abeni * CPU and blocked). Increased when a task moves to this runqueue, and 6228fd27231SLuca Abeni * decreased when the task moves away (migrates, changes scheduling 6238fd27231SLuca Abeni * policy, or terminates). 6248fd27231SLuca Abeni * This is needed to compute the "inactive utilization" for the 6258fd27231SLuca Abeni * runqueue (inactive utilization = this_bw - running_bw). 6268fd27231SLuca Abeni */ 6278fd27231SLuca Abeni u64 this_bw; 628daec5798SLuca Abeni u64 extra_bw; 6298fd27231SLuca Abeni 6308fd27231SLuca Abeni /* 6314da3abceSLuca Abeni * Inverse of the fraction of CPU utilization that can be reclaimed 6324da3abceSLuca Abeni * by the GRUB algorithm. 6334da3abceSLuca Abeni */ 6344da3abceSLuca Abeni u64 bw_ratio; 635aab03e05SDario Faggioli }; 636aab03e05SDario Faggioli 637391e43daSPeter Zijlstra #ifdef CONFIG_SMP 638391e43daSPeter Zijlstra 639afe06efdSTim Chen static inline bool sched_asym_prefer(int a, int b) 640afe06efdSTim Chen { 641afe06efdSTim Chen return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); 642afe06efdSTim Chen } 643afe06efdSTim Chen 644391e43daSPeter Zijlstra /* 645391e43daSPeter Zijlstra * We add the notion of a root-domain which will be used to define per-domain 646391e43daSPeter Zijlstra * variables. Each exclusive cpuset essentially defines an island domain by 64797fb7a0aSIngo Molnar * fully partitioning the member CPUs from any other cpuset. Whenever a new 648391e43daSPeter Zijlstra * exclusive cpuset is created, we also create and attach a new root-domain 649391e43daSPeter Zijlstra * object. 650391e43daSPeter Zijlstra * 651391e43daSPeter Zijlstra */ 652391e43daSPeter Zijlstra struct root_domain { 653391e43daSPeter Zijlstra atomic_t refcount; 654391e43daSPeter Zijlstra atomic_t rto_count; 655391e43daSPeter Zijlstra struct rcu_head rcu; 656391e43daSPeter Zijlstra cpumask_var_t span; 657391e43daSPeter Zijlstra cpumask_var_t online; 658391e43daSPeter Zijlstra 6594486edd1STim Chen /* Indicate more than one runnable task for any CPU */ 6604486edd1STim Chen bool overload; 6614486edd1STim Chen 662391e43daSPeter Zijlstra /* 6631baca4ceSJuri Lelli * The bit corresponding to a CPU gets set here if such CPU has more 6641baca4ceSJuri Lelli * than one runnable -deadline task (as it is below for RT tasks). 6651baca4ceSJuri Lelli */ 6661baca4ceSJuri Lelli cpumask_var_t dlo_mask; 6671baca4ceSJuri Lelli atomic_t dlo_count; 668332ac17eSDario Faggioli struct dl_bw dl_bw; 6696bfd6d72SJuri Lelli struct cpudl cpudl; 6701baca4ceSJuri Lelli 6714bdced5cSSteven Rostedt (Red Hat) #ifdef HAVE_RT_PUSH_IPI 6724bdced5cSSteven Rostedt (Red Hat) /* 6734bdced5cSSteven Rostedt (Red Hat) * For IPI pull requests, loop across the rto_mask. 6744bdced5cSSteven Rostedt (Red Hat) */ 6754bdced5cSSteven Rostedt (Red Hat) struct irq_work rto_push_work; 6764bdced5cSSteven Rostedt (Red Hat) raw_spinlock_t rto_lock; 6774bdced5cSSteven Rostedt (Red Hat) /* These are only updated and read within rto_lock */ 6784bdced5cSSteven Rostedt (Red Hat) int rto_loop; 6794bdced5cSSteven Rostedt (Red Hat) int rto_cpu; 6804bdced5cSSteven Rostedt (Red Hat) /* These atomics are updated outside of a lock */ 6814bdced5cSSteven Rostedt (Red Hat) atomic_t rto_loop_next; 6824bdced5cSSteven Rostedt (Red Hat) atomic_t rto_loop_start; 6834bdced5cSSteven Rostedt (Red Hat) #endif 6841baca4ceSJuri Lelli /* 685391e43daSPeter Zijlstra * The "RT overload" flag: it gets set if a CPU has more than 686391e43daSPeter Zijlstra * one runnable RT task. 687391e43daSPeter Zijlstra */ 688391e43daSPeter Zijlstra cpumask_var_t rto_mask; 689391e43daSPeter Zijlstra struct cpupri cpupri; 690cd92bfd3SDietmar Eggemann 691cd92bfd3SDietmar Eggemann unsigned long max_cpu_capacity; 692391e43daSPeter Zijlstra }; 693391e43daSPeter Zijlstra 694391e43daSPeter Zijlstra extern struct root_domain def_root_domain; 695f2cb1360SIngo Molnar extern struct mutex sched_domains_mutex; 696f2cb1360SIngo Molnar 697f2cb1360SIngo Molnar extern void init_defrootdomain(void); 6988d5dc512SPeter Zijlstra extern int sched_init_domains(const struct cpumask *cpu_map); 699f2cb1360SIngo Molnar extern void rq_attach_root(struct rq *rq, struct root_domain *rd); 700364f5665SSteven Rostedt (VMware) extern void sched_get_rd(struct root_domain *rd); 701364f5665SSteven Rostedt (VMware) extern void sched_put_rd(struct root_domain *rd); 702391e43daSPeter Zijlstra 7034bdced5cSSteven Rostedt (Red Hat) #ifdef HAVE_RT_PUSH_IPI 7044bdced5cSSteven Rostedt (Red Hat) extern void rto_push_irq_work_func(struct irq_work *work); 7054bdced5cSSteven Rostedt (Red Hat) #endif 706391e43daSPeter Zijlstra #endif /* CONFIG_SMP */ 707391e43daSPeter Zijlstra 708391e43daSPeter Zijlstra /* 709391e43daSPeter Zijlstra * This is the main, per-CPU runqueue data structure. 710391e43daSPeter Zijlstra * 711391e43daSPeter Zijlstra * Locking rule: those places that want to lock multiple runqueues 712391e43daSPeter Zijlstra * (such as the load balancing or the thread migration code), lock 713391e43daSPeter Zijlstra * acquire operations must be ordered by ascending &runqueue. 714391e43daSPeter Zijlstra */ 715391e43daSPeter Zijlstra struct rq { 716391e43daSPeter Zijlstra /* runqueue lock: */ 717391e43daSPeter Zijlstra raw_spinlock_t lock; 718391e43daSPeter Zijlstra 719391e43daSPeter Zijlstra /* 720391e43daSPeter Zijlstra * nr_running and cpu_load should be in the same cacheline because 721391e43daSPeter Zijlstra * remote CPUs use both these fields when doing load calculation. 722391e43daSPeter Zijlstra */ 723c82513e5SPeter Zijlstra unsigned int nr_running; 7240ec8aa00SPeter Zijlstra #ifdef CONFIG_NUMA_BALANCING 7250ec8aa00SPeter Zijlstra unsigned int nr_numa_running; 7260ec8aa00SPeter Zijlstra unsigned int nr_preferred_running; 7270ec8aa00SPeter Zijlstra #endif 728391e43daSPeter Zijlstra #define CPU_LOAD_IDX_MAX 5 729391e43daSPeter Zijlstra unsigned long cpu_load[CPU_LOAD_IDX_MAX]; 7303451d024SFrederic Weisbecker #ifdef CONFIG_NO_HZ_COMMON 7319fd81dd5SFrederic Weisbecker #ifdef CONFIG_SMP 7329fd81dd5SFrederic Weisbecker unsigned long last_load_update_tick; 7339fd81dd5SFrederic Weisbecker #endif /* CONFIG_SMP */ 7341c792db7SSuresh Siddha unsigned long nohz_flags; 7359fd81dd5SFrederic Weisbecker #endif /* CONFIG_NO_HZ_COMMON */ 736dcdedb24SFrederic Weisbecker 73797fb7a0aSIngo Molnar /* capture load from *all* tasks on this CPU: */ 738391e43daSPeter Zijlstra struct load_weight load; 739391e43daSPeter Zijlstra unsigned long nr_load_updates; 740391e43daSPeter Zijlstra u64 nr_switches; 741391e43daSPeter Zijlstra 742391e43daSPeter Zijlstra struct cfs_rq cfs; 743391e43daSPeter Zijlstra struct rt_rq rt; 744aab03e05SDario Faggioli struct dl_rq dl; 745391e43daSPeter Zijlstra 746391e43daSPeter Zijlstra #ifdef CONFIG_FAIR_GROUP_SCHED 74797fb7a0aSIngo Molnar /* list of leaf cfs_rq on this CPU: */ 748391e43daSPeter Zijlstra struct list_head leaf_cfs_rq_list; 7499c2791f9SVincent Guittot struct list_head *tmp_alone_branch; 750a35b6466SPeter Zijlstra #endif /* CONFIG_FAIR_GROUP_SCHED */ 751a35b6466SPeter Zijlstra 752391e43daSPeter Zijlstra /* 753391e43daSPeter Zijlstra * This is part of a global counter where only the total sum 754391e43daSPeter Zijlstra * over all CPUs matters. A task can increase this counter on 755391e43daSPeter Zijlstra * one CPU and if it got migrated afterwards it may decrease 756391e43daSPeter Zijlstra * it on another CPU. Always updated under the runqueue lock: 757391e43daSPeter Zijlstra */ 758391e43daSPeter Zijlstra unsigned long nr_uninterruptible; 759391e43daSPeter Zijlstra 76097fb7a0aSIngo Molnar struct task_struct *curr; 76197fb7a0aSIngo Molnar struct task_struct *idle; 76297fb7a0aSIngo Molnar struct task_struct *stop; 763391e43daSPeter Zijlstra unsigned long next_balance; 764391e43daSPeter Zijlstra struct mm_struct *prev_mm; 765391e43daSPeter Zijlstra 766cb42c9a3SMatt Fleming unsigned int clock_update_flags; 767391e43daSPeter Zijlstra u64 clock; 768391e43daSPeter Zijlstra u64 clock_task; 769391e43daSPeter Zijlstra 770391e43daSPeter Zijlstra atomic_t nr_iowait; 771391e43daSPeter Zijlstra 772391e43daSPeter Zijlstra #ifdef CONFIG_SMP 773391e43daSPeter Zijlstra struct root_domain *rd; 774391e43daSPeter Zijlstra struct sched_domain *sd; 775391e43daSPeter Zijlstra 776ced549faSNicolas Pitre unsigned long cpu_capacity; 777ca6d75e6SVincent Guittot unsigned long cpu_capacity_orig; 778391e43daSPeter Zijlstra 779e3fca9e7SPeter Zijlstra struct callback_head *balance_callback; 780e3fca9e7SPeter Zijlstra 781391e43daSPeter Zijlstra unsigned char idle_balance; 78297fb7a0aSIngo Molnar 783391e43daSPeter Zijlstra /* For active balancing */ 784391e43daSPeter Zijlstra int active_balance; 785391e43daSPeter Zijlstra int push_cpu; 786391e43daSPeter Zijlstra struct cpu_stop_work active_balance_work; 78797fb7a0aSIngo Molnar 78897fb7a0aSIngo Molnar /* CPU of this runqueue: */ 789391e43daSPeter Zijlstra int cpu; 790391e43daSPeter Zijlstra int online; 791391e43daSPeter Zijlstra 792367456c7SPeter Zijlstra struct list_head cfs_tasks; 793367456c7SPeter Zijlstra 794391e43daSPeter Zijlstra u64 rt_avg; 795391e43daSPeter Zijlstra u64 age_stamp; 796391e43daSPeter Zijlstra u64 idle_stamp; 797391e43daSPeter Zijlstra u64 avg_idle; 7989bd721c5SJason Low 7999bd721c5SJason Low /* This is used to determine avg_idle's max value */ 8009bd721c5SJason Low u64 max_idle_balance_cost; 801391e43daSPeter Zijlstra #endif 802391e43daSPeter Zijlstra 803391e43daSPeter Zijlstra #ifdef CONFIG_IRQ_TIME_ACCOUNTING 804391e43daSPeter Zijlstra u64 prev_irq_time; 805391e43daSPeter Zijlstra #endif 806391e43daSPeter Zijlstra #ifdef CONFIG_PARAVIRT 807391e43daSPeter Zijlstra u64 prev_steal_time; 808391e43daSPeter Zijlstra #endif 809391e43daSPeter Zijlstra #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING 810391e43daSPeter Zijlstra u64 prev_steal_time_rq; 811391e43daSPeter Zijlstra #endif 812391e43daSPeter Zijlstra 813391e43daSPeter Zijlstra /* calc_load related fields */ 814391e43daSPeter Zijlstra unsigned long calc_load_update; 815391e43daSPeter Zijlstra long calc_load_active; 816391e43daSPeter Zijlstra 817391e43daSPeter Zijlstra #ifdef CONFIG_SCHED_HRTICK 818391e43daSPeter Zijlstra #ifdef CONFIG_SMP 819391e43daSPeter Zijlstra int hrtick_csd_pending; 820966a9671SYing Huang call_single_data_t hrtick_csd; 821391e43daSPeter Zijlstra #endif 822391e43daSPeter Zijlstra struct hrtimer hrtick_timer; 823391e43daSPeter Zijlstra #endif 824391e43daSPeter Zijlstra 825391e43daSPeter Zijlstra #ifdef CONFIG_SCHEDSTATS 826391e43daSPeter Zijlstra /* latency stats */ 827391e43daSPeter Zijlstra struct sched_info rq_sched_info; 828391e43daSPeter Zijlstra unsigned long long rq_cpu_time; 829391e43daSPeter Zijlstra /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ 830391e43daSPeter Zijlstra 831391e43daSPeter Zijlstra /* sys_sched_yield() stats */ 832391e43daSPeter Zijlstra unsigned int yld_count; 833391e43daSPeter Zijlstra 834391e43daSPeter Zijlstra /* schedule() stats */ 835391e43daSPeter Zijlstra unsigned int sched_count; 836391e43daSPeter Zijlstra unsigned int sched_goidle; 837391e43daSPeter Zijlstra 838391e43daSPeter Zijlstra /* try_to_wake_up() stats */ 839391e43daSPeter Zijlstra unsigned int ttwu_count; 840391e43daSPeter Zijlstra unsigned int ttwu_local; 841391e43daSPeter Zijlstra #endif 842391e43daSPeter Zijlstra 843391e43daSPeter Zijlstra #ifdef CONFIG_SMP 844391e43daSPeter Zijlstra struct llist_head wake_list; 845391e43daSPeter Zijlstra #endif 846442bf3aaSDaniel Lezcano 847442bf3aaSDaniel Lezcano #ifdef CONFIG_CPU_IDLE 848442bf3aaSDaniel Lezcano /* Must be inspected within a rcu lock section */ 849442bf3aaSDaniel Lezcano struct cpuidle_state *idle_state; 850442bf3aaSDaniel Lezcano #endif 851391e43daSPeter Zijlstra }; 852391e43daSPeter Zijlstra 853391e43daSPeter Zijlstra static inline int cpu_of(struct rq *rq) 854391e43daSPeter Zijlstra { 855391e43daSPeter Zijlstra #ifdef CONFIG_SMP 856391e43daSPeter Zijlstra return rq->cpu; 857391e43daSPeter Zijlstra #else 858391e43daSPeter Zijlstra return 0; 859391e43daSPeter Zijlstra #endif 860391e43daSPeter Zijlstra } 861391e43daSPeter Zijlstra 8621b568f0aSPeter Zijlstra 8631b568f0aSPeter Zijlstra #ifdef CONFIG_SCHED_SMT 8641b568f0aSPeter Zijlstra 8651b568f0aSPeter Zijlstra extern struct static_key_false sched_smt_present; 8661b568f0aSPeter Zijlstra 8671b568f0aSPeter Zijlstra extern void __update_idle_core(struct rq *rq); 8681b568f0aSPeter Zijlstra 8691b568f0aSPeter Zijlstra static inline void update_idle_core(struct rq *rq) 8701b568f0aSPeter Zijlstra { 8711b568f0aSPeter Zijlstra if (static_branch_unlikely(&sched_smt_present)) 8721b568f0aSPeter Zijlstra __update_idle_core(rq); 8731b568f0aSPeter Zijlstra } 8741b568f0aSPeter Zijlstra 8751b568f0aSPeter Zijlstra #else 8761b568f0aSPeter Zijlstra static inline void update_idle_core(struct rq *rq) { } 8771b568f0aSPeter Zijlstra #endif 8781b568f0aSPeter Zijlstra 8798b06c55bSPranith Kumar DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 880391e43daSPeter Zijlstra 881518cd623SPeter Zijlstra #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) 8824a32fea9SChristoph Lameter #define this_rq() this_cpu_ptr(&runqueues) 883518cd623SPeter Zijlstra #define task_rq(p) cpu_rq(task_cpu(p)) 884518cd623SPeter Zijlstra #define cpu_curr(cpu) (cpu_rq(cpu)->curr) 8854a32fea9SChristoph Lameter #define raw_rq() raw_cpu_ptr(&runqueues) 886518cd623SPeter Zijlstra 887cebde6d6SPeter Zijlstra static inline u64 __rq_clock_broken(struct rq *rq) 888cebde6d6SPeter Zijlstra { 889316c1608SJason Low return READ_ONCE(rq->clock); 890cebde6d6SPeter Zijlstra } 891cebde6d6SPeter Zijlstra 892cb42c9a3SMatt Fleming /* 893cb42c9a3SMatt Fleming * rq::clock_update_flags bits 894cb42c9a3SMatt Fleming * 895cb42c9a3SMatt Fleming * %RQCF_REQ_SKIP - will request skipping of clock update on the next 896cb42c9a3SMatt Fleming * call to __schedule(). This is an optimisation to avoid 897cb42c9a3SMatt Fleming * neighbouring rq clock updates. 898cb42c9a3SMatt Fleming * 899cb42c9a3SMatt Fleming * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is 900cb42c9a3SMatt Fleming * in effect and calls to update_rq_clock() are being ignored. 901cb42c9a3SMatt Fleming * 902cb42c9a3SMatt Fleming * %RQCF_UPDATED - is a debug flag that indicates whether a call has been 903cb42c9a3SMatt Fleming * made to update_rq_clock() since the last time rq::lock was pinned. 904cb42c9a3SMatt Fleming * 905cb42c9a3SMatt Fleming * If inside of __schedule(), clock_update_flags will have been 906cb42c9a3SMatt Fleming * shifted left (a left shift is a cheap operation for the fast path 907cb42c9a3SMatt Fleming * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use, 908cb42c9a3SMatt Fleming * 909cb42c9a3SMatt Fleming * if (rq-clock_update_flags >= RQCF_UPDATED) 910cb42c9a3SMatt Fleming * 911cb42c9a3SMatt Fleming * to check if %RQCF_UPADTED is set. It'll never be shifted more than 912cb42c9a3SMatt Fleming * one position though, because the next rq_unpin_lock() will shift it 913cb42c9a3SMatt Fleming * back. 914cb42c9a3SMatt Fleming */ 915cb42c9a3SMatt Fleming #define RQCF_REQ_SKIP 0x01 916cb42c9a3SMatt Fleming #define RQCF_ACT_SKIP 0x02 917cb42c9a3SMatt Fleming #define RQCF_UPDATED 0x04 918cb42c9a3SMatt Fleming 919cb42c9a3SMatt Fleming static inline void assert_clock_updated(struct rq *rq) 920cb42c9a3SMatt Fleming { 921cb42c9a3SMatt Fleming /* 922cb42c9a3SMatt Fleming * The only reason for not seeing a clock update since the 923cb42c9a3SMatt Fleming * last rq_pin_lock() is if we're currently skipping updates. 924cb42c9a3SMatt Fleming */ 925cb42c9a3SMatt Fleming SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP); 926cb42c9a3SMatt Fleming } 927cb42c9a3SMatt Fleming 92878becc27SFrederic Weisbecker static inline u64 rq_clock(struct rq *rq) 92978becc27SFrederic Weisbecker { 930cebde6d6SPeter Zijlstra lockdep_assert_held(&rq->lock); 931cb42c9a3SMatt Fleming assert_clock_updated(rq); 932cb42c9a3SMatt Fleming 93378becc27SFrederic Weisbecker return rq->clock; 93478becc27SFrederic Weisbecker } 93578becc27SFrederic Weisbecker 93678becc27SFrederic Weisbecker static inline u64 rq_clock_task(struct rq *rq) 93778becc27SFrederic Weisbecker { 938cebde6d6SPeter Zijlstra lockdep_assert_held(&rq->lock); 939cb42c9a3SMatt Fleming assert_clock_updated(rq); 940cb42c9a3SMatt Fleming 94178becc27SFrederic Weisbecker return rq->clock_task; 94278becc27SFrederic Weisbecker } 94378becc27SFrederic Weisbecker 9449edfbfedSPeter Zijlstra static inline void rq_clock_skip_update(struct rq *rq, bool skip) 9459edfbfedSPeter Zijlstra { 9469edfbfedSPeter Zijlstra lockdep_assert_held(&rq->lock); 9479edfbfedSPeter Zijlstra if (skip) 948cb42c9a3SMatt Fleming rq->clock_update_flags |= RQCF_REQ_SKIP; 9499edfbfedSPeter Zijlstra else 950cb42c9a3SMatt Fleming rq->clock_update_flags &= ~RQCF_REQ_SKIP; 9519edfbfedSPeter Zijlstra } 9529edfbfedSPeter Zijlstra 953d8ac8971SMatt Fleming struct rq_flags { 954d8ac8971SMatt Fleming unsigned long flags; 955d8ac8971SMatt Fleming struct pin_cookie cookie; 956cb42c9a3SMatt Fleming #ifdef CONFIG_SCHED_DEBUG 957cb42c9a3SMatt Fleming /* 958cb42c9a3SMatt Fleming * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the 959cb42c9a3SMatt Fleming * current pin context is stashed here in case it needs to be 960cb42c9a3SMatt Fleming * restored in rq_repin_lock(). 961cb42c9a3SMatt Fleming */ 962cb42c9a3SMatt Fleming unsigned int clock_update_flags; 963cb42c9a3SMatt Fleming #endif 964d8ac8971SMatt Fleming }; 965d8ac8971SMatt Fleming 966d8ac8971SMatt Fleming static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) 967d8ac8971SMatt Fleming { 968d8ac8971SMatt Fleming rf->cookie = lockdep_pin_lock(&rq->lock); 969cb42c9a3SMatt Fleming 970cb42c9a3SMatt Fleming #ifdef CONFIG_SCHED_DEBUG 971cb42c9a3SMatt Fleming rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); 972cb42c9a3SMatt Fleming rf->clock_update_flags = 0; 973cb42c9a3SMatt Fleming #endif 974d8ac8971SMatt Fleming } 975d8ac8971SMatt Fleming 976d8ac8971SMatt Fleming static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) 977d8ac8971SMatt Fleming { 978cb42c9a3SMatt Fleming #ifdef CONFIG_SCHED_DEBUG 979cb42c9a3SMatt Fleming if (rq->clock_update_flags > RQCF_ACT_SKIP) 980cb42c9a3SMatt Fleming rf->clock_update_flags = RQCF_UPDATED; 981cb42c9a3SMatt Fleming #endif 982cb42c9a3SMatt Fleming 983d8ac8971SMatt Fleming lockdep_unpin_lock(&rq->lock, rf->cookie); 984d8ac8971SMatt Fleming } 985d8ac8971SMatt Fleming 986d8ac8971SMatt Fleming static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf) 987d8ac8971SMatt Fleming { 988d8ac8971SMatt Fleming lockdep_repin_lock(&rq->lock, rf->cookie); 989cb42c9a3SMatt Fleming 990cb42c9a3SMatt Fleming #ifdef CONFIG_SCHED_DEBUG 991cb42c9a3SMatt Fleming /* 992cb42c9a3SMatt Fleming * Restore the value we stashed in @rf for this pin context. 993cb42c9a3SMatt Fleming */ 994cb42c9a3SMatt Fleming rq->clock_update_flags |= rf->clock_update_flags; 995cb42c9a3SMatt Fleming #endif 996d8ac8971SMatt Fleming } 997d8ac8971SMatt Fleming 9989942f79bSRik van Riel #ifdef CONFIG_NUMA 999e3fe70b1SRik van Riel enum numa_topology_type { 1000e3fe70b1SRik van Riel NUMA_DIRECT, 1001e3fe70b1SRik van Riel NUMA_GLUELESS_MESH, 1002e3fe70b1SRik van Riel NUMA_BACKPLANE, 1003e3fe70b1SRik van Riel }; 1004e3fe70b1SRik van Riel extern enum numa_topology_type sched_numa_topology_type; 10059942f79bSRik van Riel extern int sched_max_numa_distance; 10069942f79bSRik van Riel extern bool find_numa_distance(int distance); 10079942f79bSRik van Riel #endif 10089942f79bSRik van Riel 1009f2cb1360SIngo Molnar #ifdef CONFIG_NUMA 1010f2cb1360SIngo Molnar extern void sched_init_numa(void); 1011f2cb1360SIngo Molnar extern void sched_domains_numa_masks_set(unsigned int cpu); 1012f2cb1360SIngo Molnar extern void sched_domains_numa_masks_clear(unsigned int cpu); 1013f2cb1360SIngo Molnar #else 1014f2cb1360SIngo Molnar static inline void sched_init_numa(void) { } 1015f2cb1360SIngo Molnar static inline void sched_domains_numa_masks_set(unsigned int cpu) { } 1016f2cb1360SIngo Molnar static inline void sched_domains_numa_masks_clear(unsigned int cpu) { } 1017f2cb1360SIngo Molnar #endif 1018f2cb1360SIngo Molnar 1019f809ca9aSMel Gorman #ifdef CONFIG_NUMA_BALANCING 102044dba3d5SIulia Manda /* The regions in numa_faults array from task_struct */ 102144dba3d5SIulia Manda enum numa_faults_stats { 102244dba3d5SIulia Manda NUMA_MEM = 0, 102344dba3d5SIulia Manda NUMA_CPU, 102444dba3d5SIulia Manda NUMA_MEMBUF, 102544dba3d5SIulia Manda NUMA_CPUBUF 102644dba3d5SIulia Manda }; 10270ec8aa00SPeter Zijlstra extern void sched_setnuma(struct task_struct *p, int node); 1028e6628d5bSMel Gorman extern int migrate_task_to(struct task_struct *p, int cpu); 1029ac66f547SPeter Zijlstra extern int migrate_swap(struct task_struct *, struct task_struct *); 1030f809ca9aSMel Gorman #endif /* CONFIG_NUMA_BALANCING */ 1031f809ca9aSMel Gorman 1032518cd623SPeter Zijlstra #ifdef CONFIG_SMP 1033518cd623SPeter Zijlstra 1034e3fca9e7SPeter Zijlstra static inline void 1035e3fca9e7SPeter Zijlstra queue_balance_callback(struct rq *rq, 1036e3fca9e7SPeter Zijlstra struct callback_head *head, 1037e3fca9e7SPeter Zijlstra void (*func)(struct rq *rq)) 1038e3fca9e7SPeter Zijlstra { 1039e3fca9e7SPeter Zijlstra lockdep_assert_held(&rq->lock); 1040e3fca9e7SPeter Zijlstra 1041e3fca9e7SPeter Zijlstra if (unlikely(head->next)) 1042e3fca9e7SPeter Zijlstra return; 1043e3fca9e7SPeter Zijlstra 1044e3fca9e7SPeter Zijlstra head->func = (void (*)(struct callback_head *))func; 1045e3fca9e7SPeter Zijlstra head->next = rq->balance_callback; 1046e3fca9e7SPeter Zijlstra rq->balance_callback = head; 1047e3fca9e7SPeter Zijlstra } 1048e3fca9e7SPeter Zijlstra 1049e3baac47SPeter Zijlstra extern void sched_ttwu_pending(void); 1050e3baac47SPeter Zijlstra 1051391e43daSPeter Zijlstra #define rcu_dereference_check_sched_domain(p) \ 1052391e43daSPeter Zijlstra rcu_dereference_check((p), \ 1053391e43daSPeter Zijlstra lockdep_is_held(&sched_domains_mutex)) 1054391e43daSPeter Zijlstra 1055391e43daSPeter Zijlstra /* 1056391e43daSPeter Zijlstra * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 1057391e43daSPeter Zijlstra * See detach_destroy_domains: synchronize_sched for details. 1058391e43daSPeter Zijlstra * 1059391e43daSPeter Zijlstra * The domain tree of any CPU may only be accessed from within 1060391e43daSPeter Zijlstra * preempt-disabled sections. 1061391e43daSPeter Zijlstra */ 1062391e43daSPeter Zijlstra #define for_each_domain(cpu, __sd) \ 1063518cd623SPeter Zijlstra for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \ 1064518cd623SPeter Zijlstra __sd; __sd = __sd->parent) 1065391e43daSPeter Zijlstra 106677e81365SSuresh Siddha #define for_each_lower_domain(sd) for (; sd; sd = sd->child) 106777e81365SSuresh Siddha 1068518cd623SPeter Zijlstra /** 1069518cd623SPeter Zijlstra * highest_flag_domain - Return highest sched_domain containing flag. 107097fb7a0aSIngo Molnar * @cpu: The CPU whose highest level of sched domain is to 1071518cd623SPeter Zijlstra * be returned. 1072518cd623SPeter Zijlstra * @flag: The flag to check for the highest sched_domain 107397fb7a0aSIngo Molnar * for the given CPU. 1074518cd623SPeter Zijlstra * 107597fb7a0aSIngo Molnar * Returns the highest sched_domain of a CPU which contains the given flag. 1076518cd623SPeter Zijlstra */ 1077518cd623SPeter Zijlstra static inline struct sched_domain *highest_flag_domain(int cpu, int flag) 1078518cd623SPeter Zijlstra { 1079518cd623SPeter Zijlstra struct sched_domain *sd, *hsd = NULL; 1080518cd623SPeter Zijlstra 1081518cd623SPeter Zijlstra for_each_domain(cpu, sd) { 1082518cd623SPeter Zijlstra if (!(sd->flags & flag)) 1083518cd623SPeter Zijlstra break; 1084518cd623SPeter Zijlstra hsd = sd; 1085518cd623SPeter Zijlstra } 1086518cd623SPeter Zijlstra 1087518cd623SPeter Zijlstra return hsd; 1088518cd623SPeter Zijlstra } 1089518cd623SPeter Zijlstra 1090fb13c7eeSMel Gorman static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) 1091fb13c7eeSMel Gorman { 1092fb13c7eeSMel Gorman struct sched_domain *sd; 1093fb13c7eeSMel Gorman 1094fb13c7eeSMel Gorman for_each_domain(cpu, sd) { 1095fb13c7eeSMel Gorman if (sd->flags & flag) 1096fb13c7eeSMel Gorman break; 1097fb13c7eeSMel Gorman } 1098fb13c7eeSMel Gorman 1099fb13c7eeSMel Gorman return sd; 1100fb13c7eeSMel Gorman } 1101fb13c7eeSMel Gorman 1102518cd623SPeter Zijlstra DECLARE_PER_CPU(struct sched_domain *, sd_llc); 11037d9ffa89SPeter Zijlstra DECLARE_PER_CPU(int, sd_llc_size); 1104518cd623SPeter Zijlstra DECLARE_PER_CPU(int, sd_llc_id); 11050e369d75SPeter Zijlstra DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); 1106fb13c7eeSMel Gorman DECLARE_PER_CPU(struct sched_domain *, sd_numa); 110737dc6b50SPreeti U Murthy DECLARE_PER_CPU(struct sched_domain *, sd_asym); 1108518cd623SPeter Zijlstra 110963b2ca30SNicolas Pitre struct sched_group_capacity { 11105e6521eaSLi Zefan atomic_t ref; 11115e6521eaSLi Zefan /* 1112172895e6SYuyang Du * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity 111363b2ca30SNicolas Pitre * for a single CPU. 11145e6521eaSLi Zefan */ 1115bf475ce0SMorten Rasmussen unsigned long capacity; 1116bf475ce0SMorten Rasmussen unsigned long min_capacity; /* Min per-CPU capacity in group */ 11175e6521eaSLi Zefan unsigned long next_update; 111863b2ca30SNicolas Pitre int imbalance; /* XXX unrelated to capacity but shared group state */ 11195e6521eaSLi Zefan 1120005f874dSPeter Zijlstra #ifdef CONFIG_SCHED_DEBUG 1121005f874dSPeter Zijlstra int id; 1122005f874dSPeter Zijlstra #endif 1123005f874dSPeter Zijlstra 112497fb7a0aSIngo Molnar unsigned long cpumask[0]; /* Balance mask */ 11255e6521eaSLi Zefan }; 11265e6521eaSLi Zefan 11275e6521eaSLi Zefan struct sched_group { 11285e6521eaSLi Zefan struct sched_group *next; /* Must be a circular list */ 11295e6521eaSLi Zefan atomic_t ref; 11305e6521eaSLi Zefan 11315e6521eaSLi Zefan unsigned int group_weight; 113263b2ca30SNicolas Pitre struct sched_group_capacity *sgc; 113397fb7a0aSIngo Molnar int asym_prefer_cpu; /* CPU of highest priority in group */ 11345e6521eaSLi Zefan 11355e6521eaSLi Zefan /* 11365e6521eaSLi Zefan * The CPUs this group covers. 11375e6521eaSLi Zefan * 11385e6521eaSLi Zefan * NOTE: this field is variable length. (Allocated dynamically 11395e6521eaSLi Zefan * by attaching extra space to the end of the structure, 11405e6521eaSLi Zefan * depending on how many CPUs the kernel has booted up with) 11415e6521eaSLi Zefan */ 11425e6521eaSLi Zefan unsigned long cpumask[0]; 11435e6521eaSLi Zefan }; 11445e6521eaSLi Zefan 1145ae4df9d6SPeter Zijlstra static inline struct cpumask *sched_group_span(struct sched_group *sg) 11465e6521eaSLi Zefan { 11475e6521eaSLi Zefan return to_cpumask(sg->cpumask); 11485e6521eaSLi Zefan } 11495e6521eaSLi Zefan 11505e6521eaSLi Zefan /* 1151e5c14b1fSPeter Zijlstra * See build_balance_mask(). 11525e6521eaSLi Zefan */ 1153e5c14b1fSPeter Zijlstra static inline struct cpumask *group_balance_mask(struct sched_group *sg) 11545e6521eaSLi Zefan { 115563b2ca30SNicolas Pitre return to_cpumask(sg->sgc->cpumask); 11565e6521eaSLi Zefan } 11575e6521eaSLi Zefan 11585e6521eaSLi Zefan /** 115997fb7a0aSIngo Molnar * group_first_cpu - Returns the first CPU in the cpumask of a sched_group. 116097fb7a0aSIngo Molnar * @group: The group whose first CPU is to be returned. 11615e6521eaSLi Zefan */ 11625e6521eaSLi Zefan static inline unsigned int group_first_cpu(struct sched_group *group) 11635e6521eaSLi Zefan { 1164ae4df9d6SPeter Zijlstra return cpumask_first(sched_group_span(group)); 11655e6521eaSLi Zefan } 11665e6521eaSLi Zefan 1167c1174876SPeter Zijlstra extern int group_balance_cpu(struct sched_group *sg); 1168c1174876SPeter Zijlstra 11693866e845SSteven Rostedt (Red Hat) #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) 11703866e845SSteven Rostedt (Red Hat) void register_sched_domain_sysctl(void); 1171bbdacdfeSPeter Zijlstra void dirty_sched_domain_sysctl(int cpu); 11723866e845SSteven Rostedt (Red Hat) void unregister_sched_domain_sysctl(void); 11733866e845SSteven Rostedt (Red Hat) #else 11743866e845SSteven Rostedt (Red Hat) static inline void register_sched_domain_sysctl(void) 11753866e845SSteven Rostedt (Red Hat) { 11763866e845SSteven Rostedt (Red Hat) } 1177bbdacdfeSPeter Zijlstra static inline void dirty_sched_domain_sysctl(int cpu) 1178bbdacdfeSPeter Zijlstra { 1179bbdacdfeSPeter Zijlstra } 11803866e845SSteven Rostedt (Red Hat) static inline void unregister_sched_domain_sysctl(void) 11813866e845SSteven Rostedt (Red Hat) { 11823866e845SSteven Rostedt (Red Hat) } 11833866e845SSteven Rostedt (Red Hat) #endif 11843866e845SSteven Rostedt (Red Hat) 1185e3baac47SPeter Zijlstra #else 1186e3baac47SPeter Zijlstra 1187e3baac47SPeter Zijlstra static inline void sched_ttwu_pending(void) { } 1188e3baac47SPeter Zijlstra 1189518cd623SPeter Zijlstra #endif /* CONFIG_SMP */ 1190391e43daSPeter Zijlstra 1191391e43daSPeter Zijlstra #include "stats.h" 11921051408fSIngo Molnar #include "autogroup.h" 1193391e43daSPeter Zijlstra 1194391e43daSPeter Zijlstra #ifdef CONFIG_CGROUP_SCHED 1195391e43daSPeter Zijlstra 1196391e43daSPeter Zijlstra /* 1197391e43daSPeter Zijlstra * Return the group to which this tasks belongs. 1198391e43daSPeter Zijlstra * 11998af01f56STejun Heo * We cannot use task_css() and friends because the cgroup subsystem 12008af01f56STejun Heo * changes that value before the cgroup_subsys::attach() method is called, 12018af01f56STejun Heo * therefore we cannot pin it and might observe the wrong value. 12028323f26cSPeter Zijlstra * 12038323f26cSPeter Zijlstra * The same is true for autogroup's p->signal->autogroup->tg, the autogroup 12048323f26cSPeter Zijlstra * core changes this before calling sched_move_task(). 12058323f26cSPeter Zijlstra * 12068323f26cSPeter Zijlstra * Instead we use a 'copy' which is updated from sched_move_task() while 12078323f26cSPeter Zijlstra * holding both task_struct::pi_lock and rq::lock. 1208391e43daSPeter Zijlstra */ 1209391e43daSPeter Zijlstra static inline struct task_group *task_group(struct task_struct *p) 1210391e43daSPeter Zijlstra { 12118323f26cSPeter Zijlstra return p->sched_task_group; 1212391e43daSPeter Zijlstra } 1213391e43daSPeter Zijlstra 1214391e43daSPeter Zijlstra /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 1215391e43daSPeter Zijlstra static inline void set_task_rq(struct task_struct *p, unsigned int cpu) 1216391e43daSPeter Zijlstra { 1217391e43daSPeter Zijlstra #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED) 1218391e43daSPeter Zijlstra struct task_group *tg = task_group(p); 1219391e43daSPeter Zijlstra #endif 1220391e43daSPeter Zijlstra 1221391e43daSPeter Zijlstra #ifdef CONFIG_FAIR_GROUP_SCHED 1222ad936d86SByungchul Park set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]); 1223391e43daSPeter Zijlstra p->se.cfs_rq = tg->cfs_rq[cpu]; 1224391e43daSPeter Zijlstra p->se.parent = tg->se[cpu]; 1225391e43daSPeter Zijlstra #endif 1226391e43daSPeter Zijlstra 1227391e43daSPeter Zijlstra #ifdef CONFIG_RT_GROUP_SCHED 1228391e43daSPeter Zijlstra p->rt.rt_rq = tg->rt_rq[cpu]; 1229391e43daSPeter Zijlstra p->rt.parent = tg->rt_se[cpu]; 1230391e43daSPeter Zijlstra #endif 1231391e43daSPeter Zijlstra } 1232391e43daSPeter Zijlstra 1233391e43daSPeter Zijlstra #else /* CONFIG_CGROUP_SCHED */ 1234391e43daSPeter Zijlstra 1235391e43daSPeter Zijlstra static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } 1236391e43daSPeter Zijlstra static inline struct task_group *task_group(struct task_struct *p) 1237391e43daSPeter Zijlstra { 1238391e43daSPeter Zijlstra return NULL; 1239391e43daSPeter Zijlstra } 1240391e43daSPeter Zijlstra 1241391e43daSPeter Zijlstra #endif /* CONFIG_CGROUP_SCHED */ 1242391e43daSPeter Zijlstra 1243391e43daSPeter Zijlstra static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) 1244391e43daSPeter Zijlstra { 1245391e43daSPeter Zijlstra set_task_rq(p, cpu); 1246391e43daSPeter Zijlstra #ifdef CONFIG_SMP 1247391e43daSPeter Zijlstra /* 1248391e43daSPeter Zijlstra * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be 1249391e43daSPeter Zijlstra * successfuly executed on another CPU. We must ensure that updates of 1250391e43daSPeter Zijlstra * per-task data have been completed by this moment. 1251391e43daSPeter Zijlstra */ 1252391e43daSPeter Zijlstra smp_wmb(); 1253c65eacbeSAndy Lutomirski #ifdef CONFIG_THREAD_INFO_IN_TASK 1254c65eacbeSAndy Lutomirski p->cpu = cpu; 1255c65eacbeSAndy Lutomirski #else 1256391e43daSPeter Zijlstra task_thread_info(p)->cpu = cpu; 1257c65eacbeSAndy Lutomirski #endif 1258ac66f547SPeter Zijlstra p->wake_cpu = cpu; 1259391e43daSPeter Zijlstra #endif 1260391e43daSPeter Zijlstra } 1261391e43daSPeter Zijlstra 1262391e43daSPeter Zijlstra /* 1263391e43daSPeter Zijlstra * Tunables that become constants when CONFIG_SCHED_DEBUG is off: 1264391e43daSPeter Zijlstra */ 1265391e43daSPeter Zijlstra #ifdef CONFIG_SCHED_DEBUG 1266c5905afbSIngo Molnar # include <linux/static_key.h> 1267391e43daSPeter Zijlstra # define const_debug __read_mostly 1268391e43daSPeter Zijlstra #else 1269391e43daSPeter Zijlstra # define const_debug const 1270391e43daSPeter Zijlstra #endif 1271391e43daSPeter Zijlstra 1272391e43daSPeter Zijlstra #define SCHED_FEAT(name, enabled) \ 1273391e43daSPeter Zijlstra __SCHED_FEAT_##name , 1274391e43daSPeter Zijlstra 1275391e43daSPeter Zijlstra enum { 1276391e43daSPeter Zijlstra #include "features.h" 1277f8b6d1ccSPeter Zijlstra __SCHED_FEAT_NR, 1278391e43daSPeter Zijlstra }; 1279391e43daSPeter Zijlstra 1280391e43daSPeter Zijlstra #undef SCHED_FEAT 1281391e43daSPeter Zijlstra 1282f8b6d1ccSPeter Zijlstra #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) 1283765cc3a4SPatrick Bellasi 1284765cc3a4SPatrick Bellasi /* 1285765cc3a4SPatrick Bellasi * To support run-time toggling of sched features, all the translation units 1286765cc3a4SPatrick Bellasi * (but core.c) reference the sysctl_sched_features defined in core.c. 1287765cc3a4SPatrick Bellasi */ 1288765cc3a4SPatrick Bellasi extern const_debug unsigned int sysctl_sched_features; 1289765cc3a4SPatrick Bellasi 1290f8b6d1ccSPeter Zijlstra #define SCHED_FEAT(name, enabled) \ 1291c5905afbSIngo Molnar static __always_inline bool static_branch_##name(struct static_key *key) \ 1292f8b6d1ccSPeter Zijlstra { \ 12936e76ea8aSJason Baron return static_key_##enabled(key); \ 1294f8b6d1ccSPeter Zijlstra } 1295f8b6d1ccSPeter Zijlstra 1296f8b6d1ccSPeter Zijlstra #include "features.h" 1297f8b6d1ccSPeter Zijlstra #undef SCHED_FEAT 1298f8b6d1ccSPeter Zijlstra 1299c5905afbSIngo Molnar extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; 1300f8b6d1ccSPeter Zijlstra #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) 1301765cc3a4SPatrick Bellasi 1302f8b6d1ccSPeter Zijlstra #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ 1303765cc3a4SPatrick Bellasi 1304765cc3a4SPatrick Bellasi /* 1305765cc3a4SPatrick Bellasi * Each translation unit has its own copy of sysctl_sched_features to allow 1306765cc3a4SPatrick Bellasi * constants propagation at compile time and compiler optimization based on 1307765cc3a4SPatrick Bellasi * features default. 1308765cc3a4SPatrick Bellasi */ 1309765cc3a4SPatrick Bellasi #define SCHED_FEAT(name, enabled) \ 1310765cc3a4SPatrick Bellasi (1UL << __SCHED_FEAT_##name) * enabled | 1311765cc3a4SPatrick Bellasi static const_debug __maybe_unused unsigned int sysctl_sched_features = 1312765cc3a4SPatrick Bellasi #include "features.h" 1313765cc3a4SPatrick Bellasi 0; 1314765cc3a4SPatrick Bellasi #undef SCHED_FEAT 1315765cc3a4SPatrick Bellasi 1316391e43daSPeter Zijlstra #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) 1317765cc3a4SPatrick Bellasi 1318f8b6d1ccSPeter Zijlstra #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */ 1319391e43daSPeter Zijlstra 13202a595721SSrikar Dronamraju extern struct static_key_false sched_numa_balancing; 1321cb251765SMel Gorman extern struct static_key_false sched_schedstats; 1322cbee9f88SPeter Zijlstra 1323391e43daSPeter Zijlstra static inline u64 global_rt_period(void) 1324391e43daSPeter Zijlstra { 1325391e43daSPeter Zijlstra return (u64)sysctl_sched_rt_period * NSEC_PER_USEC; 1326391e43daSPeter Zijlstra } 1327391e43daSPeter Zijlstra 1328391e43daSPeter Zijlstra static inline u64 global_rt_runtime(void) 1329391e43daSPeter Zijlstra { 1330391e43daSPeter Zijlstra if (sysctl_sched_rt_runtime < 0) 1331391e43daSPeter Zijlstra return RUNTIME_INF; 1332391e43daSPeter Zijlstra 1333391e43daSPeter Zijlstra return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 1334391e43daSPeter Zijlstra } 1335391e43daSPeter Zijlstra 1336391e43daSPeter Zijlstra static inline int task_current(struct rq *rq, struct task_struct *p) 1337391e43daSPeter Zijlstra { 1338391e43daSPeter Zijlstra return rq->curr == p; 1339391e43daSPeter Zijlstra } 1340391e43daSPeter Zijlstra 1341391e43daSPeter Zijlstra static inline int task_running(struct rq *rq, struct task_struct *p) 1342391e43daSPeter Zijlstra { 1343391e43daSPeter Zijlstra #ifdef CONFIG_SMP 1344391e43daSPeter Zijlstra return p->on_cpu; 1345391e43daSPeter Zijlstra #else 1346391e43daSPeter Zijlstra return task_current(rq, p); 1347391e43daSPeter Zijlstra #endif 1348391e43daSPeter Zijlstra } 1349391e43daSPeter Zijlstra 1350da0c1e65SKirill Tkhai static inline int task_on_rq_queued(struct task_struct *p) 1351da0c1e65SKirill Tkhai { 1352da0c1e65SKirill Tkhai return p->on_rq == TASK_ON_RQ_QUEUED; 1353da0c1e65SKirill Tkhai } 1354391e43daSPeter Zijlstra 1355cca26e80SKirill Tkhai static inline int task_on_rq_migrating(struct task_struct *p) 1356cca26e80SKirill Tkhai { 1357cca26e80SKirill Tkhai return p->on_rq == TASK_ON_RQ_MIGRATING; 1358cca26e80SKirill Tkhai } 1359cca26e80SKirill Tkhai 1360391e43daSPeter Zijlstra #ifndef prepare_arch_switch 1361391e43daSPeter Zijlstra # define prepare_arch_switch(next) do { } while (0) 1362391e43daSPeter Zijlstra #endif 136301f23e16SCatalin Marinas #ifndef finish_arch_post_lock_switch 136401f23e16SCatalin Marinas # define finish_arch_post_lock_switch() do { } while (0) 136501f23e16SCatalin Marinas #endif 1366391e43daSPeter Zijlstra 1367b13095f0SLi Zefan /* 1368b13095f0SLi Zefan * wake flags 1369b13095f0SLi Zefan */ 137097fb7a0aSIngo Molnar #define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ 137197fb7a0aSIngo Molnar #define WF_FORK 0x02 /* Child wakeup after fork */ 137297fb7a0aSIngo Molnar #define WF_MIGRATED 0x4 /* Internal use, task got migrated */ 1373b13095f0SLi Zefan 1374391e43daSPeter Zijlstra /* 1375391e43daSPeter Zijlstra * To aid in avoiding the subversion of "niceness" due to uneven distribution 1376391e43daSPeter Zijlstra * of tasks with abnormal "nice" values across CPUs the contribution that 1377391e43daSPeter Zijlstra * each task makes to its run queue's load is weighted according to its 1378391e43daSPeter Zijlstra * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a 1379391e43daSPeter Zijlstra * scaled version of the new time slice allocation that they receive on time 1380391e43daSPeter Zijlstra * slice expiry etc. 1381391e43daSPeter Zijlstra */ 1382391e43daSPeter Zijlstra 1383391e43daSPeter Zijlstra #define WEIGHT_IDLEPRIO 3 1384391e43daSPeter Zijlstra #define WMULT_IDLEPRIO 1431655765 1385391e43daSPeter Zijlstra 1386ed82b8a1SAndi Kleen extern const int sched_prio_to_weight[40]; 1387ed82b8a1SAndi Kleen extern const u32 sched_prio_to_wmult[40]; 1388391e43daSPeter Zijlstra 1389ff77e468SPeter Zijlstra /* 1390ff77e468SPeter Zijlstra * {de,en}queue flags: 1391ff77e468SPeter Zijlstra * 1392ff77e468SPeter Zijlstra * DEQUEUE_SLEEP - task is no longer runnable 1393ff77e468SPeter Zijlstra * ENQUEUE_WAKEUP - task just became runnable 1394ff77e468SPeter Zijlstra * 1395ff77e468SPeter Zijlstra * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks 1396ff77e468SPeter Zijlstra * are in a known state which allows modification. Such pairs 1397ff77e468SPeter Zijlstra * should preserve as much state as possible. 1398ff77e468SPeter Zijlstra * 1399ff77e468SPeter Zijlstra * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location 1400ff77e468SPeter Zijlstra * in the runqueue. 1401ff77e468SPeter Zijlstra * 1402ff77e468SPeter Zijlstra * ENQUEUE_HEAD - place at front of runqueue (tail if not specified) 1403ff77e468SPeter Zijlstra * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline) 140459efa0baSPeter Zijlstra * ENQUEUE_MIGRATED - the task was migrated during wakeup 1405ff77e468SPeter Zijlstra * 1406ff77e468SPeter Zijlstra */ 1407ff77e468SPeter Zijlstra 1408ff77e468SPeter Zijlstra #define DEQUEUE_SLEEP 0x01 140997fb7a0aSIngo Molnar #define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */ 141097fb7a0aSIngo Molnar #define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */ 141197fb7a0aSIngo Molnar #define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */ 1412ff77e468SPeter Zijlstra 14131de64443SPeter Zijlstra #define ENQUEUE_WAKEUP 0x01 1414ff77e468SPeter Zijlstra #define ENQUEUE_RESTORE 0x02 1415ff77e468SPeter Zijlstra #define ENQUEUE_MOVE 0x04 14160a67d1eeSPeter Zijlstra #define ENQUEUE_NOCLOCK 0x08 1417ff77e468SPeter Zijlstra 14180a67d1eeSPeter Zijlstra #define ENQUEUE_HEAD 0x10 14190a67d1eeSPeter Zijlstra #define ENQUEUE_REPLENISH 0x20 1420c82ba9faSLi Zefan #ifdef CONFIG_SMP 14210a67d1eeSPeter Zijlstra #define ENQUEUE_MIGRATED 0x40 1422c82ba9faSLi Zefan #else 142359efa0baSPeter Zijlstra #define ENQUEUE_MIGRATED 0x00 1424c82ba9faSLi Zefan #endif 1425c82ba9faSLi Zefan 142637e117c0SPeter Zijlstra #define RETRY_TASK ((void *)-1UL) 142737e117c0SPeter Zijlstra 1428c82ba9faSLi Zefan struct sched_class { 1429c82ba9faSLi Zefan const struct sched_class *next; 1430c82ba9faSLi Zefan 1431c82ba9faSLi Zefan void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); 1432c82ba9faSLi Zefan void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); 1433c82ba9faSLi Zefan void (*yield_task) (struct rq *rq); 1434c82ba9faSLi Zefan bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt); 1435c82ba9faSLi Zefan 1436c82ba9faSLi Zefan void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags); 1437c82ba9faSLi Zefan 1438606dba2eSPeter Zijlstra /* 1439606dba2eSPeter Zijlstra * It is the responsibility of the pick_next_task() method that will 1440606dba2eSPeter Zijlstra * return the next task to call put_prev_task() on the @prev task or 1441606dba2eSPeter Zijlstra * something equivalent. 144237e117c0SPeter Zijlstra * 144337e117c0SPeter Zijlstra * May return RETRY_TASK when it finds a higher prio class has runnable 144437e117c0SPeter Zijlstra * tasks. 1445606dba2eSPeter Zijlstra */ 1446606dba2eSPeter Zijlstra struct task_struct * (*pick_next_task)(struct rq *rq, 1447e7904a28SPeter Zijlstra struct task_struct *prev, 1448d8ac8971SMatt Fleming struct rq_flags *rf); 1449c82ba9faSLi Zefan void (*put_prev_task)(struct rq *rq, struct task_struct *p); 1450c82ba9faSLi Zefan 1451c82ba9faSLi Zefan #ifdef CONFIG_SMP 1452ac66f547SPeter Zijlstra int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); 14535a4fd036Sxiaofeng.yan void (*migrate_task_rq)(struct task_struct *p); 1454c82ba9faSLi Zefan 1455c82ba9faSLi Zefan void (*task_woken)(struct rq *this_rq, struct task_struct *task); 1456c82ba9faSLi Zefan 1457c82ba9faSLi Zefan void (*set_cpus_allowed)(struct task_struct *p, 1458c82ba9faSLi Zefan const struct cpumask *newmask); 1459c82ba9faSLi Zefan 1460c82ba9faSLi Zefan void (*rq_online)(struct rq *rq); 1461c82ba9faSLi Zefan void (*rq_offline)(struct rq *rq); 1462c82ba9faSLi Zefan #endif 1463c82ba9faSLi Zefan 1464c82ba9faSLi Zefan void (*set_curr_task)(struct rq *rq); 1465c82ba9faSLi Zefan void (*task_tick)(struct rq *rq, struct task_struct *p, int queued); 1466c82ba9faSLi Zefan void (*task_fork)(struct task_struct *p); 1467e6c390f2SDario Faggioli void (*task_dead)(struct task_struct *p); 1468c82ba9faSLi Zefan 146967dfa1b7SKirill Tkhai /* 147067dfa1b7SKirill Tkhai * The switched_from() call is allowed to drop rq->lock, therefore we 147167dfa1b7SKirill Tkhai * cannot assume the switched_from/switched_to pair is serliazed by 147267dfa1b7SKirill Tkhai * rq->lock. They are however serialized by p->pi_lock. 147367dfa1b7SKirill Tkhai */ 1474c82ba9faSLi Zefan void (*switched_from)(struct rq *this_rq, struct task_struct *task); 1475c82ba9faSLi Zefan void (*switched_to) (struct rq *this_rq, struct task_struct *task); 1476c82ba9faSLi Zefan void (*prio_changed) (struct rq *this_rq, struct task_struct *task, 1477c82ba9faSLi Zefan int oldprio); 1478c82ba9faSLi Zefan 1479c82ba9faSLi Zefan unsigned int (*get_rr_interval)(struct rq *rq, 1480c82ba9faSLi Zefan struct task_struct *task); 1481c82ba9faSLi Zefan 14826e998916SStanislaw Gruszka void (*update_curr)(struct rq *rq); 14836e998916SStanislaw Gruszka 1484ea86cb4bSVincent Guittot #define TASK_SET_GROUP 0 1485ea86cb4bSVincent Guittot #define TASK_MOVE_GROUP 1 1486ea86cb4bSVincent Guittot 1487c82ba9faSLi Zefan #ifdef CONFIG_FAIR_GROUP_SCHED 1488ea86cb4bSVincent Guittot void (*task_change_group)(struct task_struct *p, int type); 1489c82ba9faSLi Zefan #endif 1490c82ba9faSLi Zefan }; 1491391e43daSPeter Zijlstra 14923f1d2a31SPeter Zijlstra static inline void put_prev_task(struct rq *rq, struct task_struct *prev) 14933f1d2a31SPeter Zijlstra { 14943f1d2a31SPeter Zijlstra prev->sched_class->put_prev_task(rq, prev); 14953f1d2a31SPeter Zijlstra } 14963f1d2a31SPeter Zijlstra 1497b2bf6c31SPeter Zijlstra static inline void set_curr_task(struct rq *rq, struct task_struct *curr) 1498b2bf6c31SPeter Zijlstra { 1499b2bf6c31SPeter Zijlstra curr->sched_class->set_curr_task(rq); 1500b2bf6c31SPeter Zijlstra } 1501b2bf6c31SPeter Zijlstra 1502f5832c19SNicolas Pitre #ifdef CONFIG_SMP 1503391e43daSPeter Zijlstra #define sched_class_highest (&stop_sched_class) 1504f5832c19SNicolas Pitre #else 1505f5832c19SNicolas Pitre #define sched_class_highest (&dl_sched_class) 1506f5832c19SNicolas Pitre #endif 1507391e43daSPeter Zijlstra #define for_each_class(class) \ 1508391e43daSPeter Zijlstra for (class = sched_class_highest; class; class = class->next) 1509391e43daSPeter Zijlstra 1510391e43daSPeter Zijlstra extern const struct sched_class stop_sched_class; 1511aab03e05SDario Faggioli extern const struct sched_class dl_sched_class; 1512391e43daSPeter Zijlstra extern const struct sched_class rt_sched_class; 1513391e43daSPeter Zijlstra extern const struct sched_class fair_sched_class; 1514391e43daSPeter Zijlstra extern const struct sched_class idle_sched_class; 1515391e43daSPeter Zijlstra 1516391e43daSPeter Zijlstra 1517391e43daSPeter Zijlstra #ifdef CONFIG_SMP 1518391e43daSPeter Zijlstra 151963b2ca30SNicolas Pitre extern void update_group_capacity(struct sched_domain *sd, int cpu); 1520b719203bSLi Zefan 15217caff66fSDaniel Lezcano extern void trigger_load_balance(struct rq *rq); 1522391e43daSPeter Zijlstra 1523c5b28038SPeter Zijlstra extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask); 1524c5b28038SPeter Zijlstra 1525391e43daSPeter Zijlstra #endif 1526391e43daSPeter Zijlstra 1527442bf3aaSDaniel Lezcano #ifdef CONFIG_CPU_IDLE 1528442bf3aaSDaniel Lezcano static inline void idle_set_state(struct rq *rq, 1529442bf3aaSDaniel Lezcano struct cpuidle_state *idle_state) 1530442bf3aaSDaniel Lezcano { 1531442bf3aaSDaniel Lezcano rq->idle_state = idle_state; 1532442bf3aaSDaniel Lezcano } 1533442bf3aaSDaniel Lezcano 1534442bf3aaSDaniel Lezcano static inline struct cpuidle_state *idle_get_state(struct rq *rq) 1535442bf3aaSDaniel Lezcano { 15369148a3a1SPeter Zijlstra SCHED_WARN_ON(!rcu_read_lock_held()); 153797fb7a0aSIngo Molnar 1538442bf3aaSDaniel Lezcano return rq->idle_state; 1539442bf3aaSDaniel Lezcano } 1540442bf3aaSDaniel Lezcano #else 1541442bf3aaSDaniel Lezcano static inline void idle_set_state(struct rq *rq, 1542442bf3aaSDaniel Lezcano struct cpuidle_state *idle_state) 1543442bf3aaSDaniel Lezcano { 1544442bf3aaSDaniel Lezcano } 1545442bf3aaSDaniel Lezcano 1546442bf3aaSDaniel Lezcano static inline struct cpuidle_state *idle_get_state(struct rq *rq) 1547442bf3aaSDaniel Lezcano { 1548442bf3aaSDaniel Lezcano return NULL; 1549442bf3aaSDaniel Lezcano } 1550442bf3aaSDaniel Lezcano #endif 1551442bf3aaSDaniel Lezcano 15528663effbSSteven Rostedt (VMware) extern void schedule_idle(void); 15538663effbSSteven Rostedt (VMware) 1554391e43daSPeter Zijlstra extern void sysrq_sched_debug_show(void); 1555391e43daSPeter Zijlstra extern void sched_init_granularity(void); 1556391e43daSPeter Zijlstra extern void update_max_interval(void); 15571baca4ceSJuri Lelli 15581baca4ceSJuri Lelli extern void init_sched_dl_class(void); 1559391e43daSPeter Zijlstra extern void init_sched_rt_class(void); 1560391e43daSPeter Zijlstra extern void init_sched_fair_class(void); 1561391e43daSPeter Zijlstra 15629059393eSVincent Guittot extern void reweight_task(struct task_struct *p, int prio); 15639059393eSVincent Guittot 15648875125eSKirill Tkhai extern void resched_curr(struct rq *rq); 1565391e43daSPeter Zijlstra extern void resched_cpu(int cpu); 1566391e43daSPeter Zijlstra 1567391e43daSPeter Zijlstra extern struct rt_bandwidth def_rt_bandwidth; 1568391e43daSPeter Zijlstra extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); 1569391e43daSPeter Zijlstra 1570332ac17eSDario Faggioli extern struct dl_bandwidth def_dl_bandwidth; 1571332ac17eSDario Faggioli extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); 1572aab03e05SDario Faggioli extern void init_dl_task_timer(struct sched_dl_entity *dl_se); 1573209a0cbdSLuca Abeni extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se); 15744da3abceSLuca Abeni extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq); 1575aab03e05SDario Faggioli 1576c52f14d3SLuca Abeni #define BW_SHIFT 20 1577c52f14d3SLuca Abeni #define BW_UNIT (1 << BW_SHIFT) 15784da3abceSLuca Abeni #define RATIO_SHIFT 8 1579332ac17eSDario Faggioli unsigned long to_ratio(u64 period, u64 runtime); 1580332ac17eSDario Faggioli 1581540247fbSYuyang Du extern void init_entity_runnable_average(struct sched_entity *se); 15822b8c41daSYuyang Du extern void post_init_entity_util_avg(struct sched_entity *se); 1583a75cdaa9SAlex Shi 158476d92ac3SFrederic Weisbecker #ifdef CONFIG_NO_HZ_FULL 158576d92ac3SFrederic Weisbecker extern bool sched_can_stop_tick(struct rq *rq); 1586d84b3131SFrederic Weisbecker extern int __init sched_tick_offload_init(void); 158776d92ac3SFrederic Weisbecker 158876d92ac3SFrederic Weisbecker /* 158976d92ac3SFrederic Weisbecker * Tick may be needed by tasks in the runqueue depending on their policy and 159076d92ac3SFrederic Weisbecker * requirements. If tick is needed, lets send the target an IPI to kick it out of 159176d92ac3SFrederic Weisbecker * nohz mode if necessary. 159276d92ac3SFrederic Weisbecker */ 159376d92ac3SFrederic Weisbecker static inline void sched_update_tick_dependency(struct rq *rq) 159476d92ac3SFrederic Weisbecker { 159576d92ac3SFrederic Weisbecker int cpu; 159676d92ac3SFrederic Weisbecker 159776d92ac3SFrederic Weisbecker if (!tick_nohz_full_enabled()) 159876d92ac3SFrederic Weisbecker return; 159976d92ac3SFrederic Weisbecker 160076d92ac3SFrederic Weisbecker cpu = cpu_of(rq); 160176d92ac3SFrederic Weisbecker 160276d92ac3SFrederic Weisbecker if (!tick_nohz_full_cpu(cpu)) 160376d92ac3SFrederic Weisbecker return; 160476d92ac3SFrederic Weisbecker 160576d92ac3SFrederic Weisbecker if (sched_can_stop_tick(rq)) 160676d92ac3SFrederic Weisbecker tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); 160776d92ac3SFrederic Weisbecker else 160876d92ac3SFrederic Weisbecker tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); 160976d92ac3SFrederic Weisbecker } 161076d92ac3SFrederic Weisbecker #else 1611d84b3131SFrederic Weisbecker static inline int sched_tick_offload_init(void) { return 0; } 161276d92ac3SFrederic Weisbecker static inline void sched_update_tick_dependency(struct rq *rq) { } 161376d92ac3SFrederic Weisbecker #endif 161476d92ac3SFrederic Weisbecker 161572465447SKirill Tkhai static inline void add_nr_running(struct rq *rq, unsigned count) 1616391e43daSPeter Zijlstra { 161772465447SKirill Tkhai unsigned prev_nr = rq->nr_running; 161872465447SKirill Tkhai 161972465447SKirill Tkhai rq->nr_running = prev_nr + count; 16209f3660c2SFrederic Weisbecker 162172465447SKirill Tkhai if (prev_nr < 2 && rq->nr_running >= 2) { 16224486edd1STim Chen #ifdef CONFIG_SMP 16234486edd1STim Chen if (!rq->rd->overload) 16244486edd1STim Chen rq->rd->overload = true; 16254486edd1STim Chen #endif 162676d92ac3SFrederic Weisbecker } 16274486edd1STim Chen 162876d92ac3SFrederic Weisbecker sched_update_tick_dependency(rq); 16294486edd1STim Chen } 1630391e43daSPeter Zijlstra 163172465447SKirill Tkhai static inline void sub_nr_running(struct rq *rq, unsigned count) 1632391e43daSPeter Zijlstra { 163372465447SKirill Tkhai rq->nr_running -= count; 163476d92ac3SFrederic Weisbecker /* Check if we still need preemption */ 163576d92ac3SFrederic Weisbecker sched_update_tick_dependency(rq); 1636391e43daSPeter Zijlstra } 1637391e43daSPeter Zijlstra 1638391e43daSPeter Zijlstra extern void update_rq_clock(struct rq *rq); 1639391e43daSPeter Zijlstra 1640391e43daSPeter Zijlstra extern void activate_task(struct rq *rq, struct task_struct *p, int flags); 1641391e43daSPeter Zijlstra extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); 1642391e43daSPeter Zijlstra 1643391e43daSPeter Zijlstra extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); 1644391e43daSPeter Zijlstra 1645391e43daSPeter Zijlstra extern const_debug unsigned int sysctl_sched_time_avg; 1646391e43daSPeter Zijlstra extern const_debug unsigned int sysctl_sched_nr_migrate; 1647391e43daSPeter Zijlstra extern const_debug unsigned int sysctl_sched_migration_cost; 1648391e43daSPeter Zijlstra 1649391e43daSPeter Zijlstra static inline u64 sched_avg_period(void) 1650391e43daSPeter Zijlstra { 1651391e43daSPeter Zijlstra return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; 1652391e43daSPeter Zijlstra } 1653391e43daSPeter Zijlstra 1654391e43daSPeter Zijlstra #ifdef CONFIG_SCHED_HRTICK 1655391e43daSPeter Zijlstra 1656391e43daSPeter Zijlstra /* 1657391e43daSPeter Zijlstra * Use hrtick when: 1658391e43daSPeter Zijlstra * - enabled by features 1659391e43daSPeter Zijlstra * - hrtimer is actually high res 1660391e43daSPeter Zijlstra */ 1661391e43daSPeter Zijlstra static inline int hrtick_enabled(struct rq *rq) 1662391e43daSPeter Zijlstra { 1663391e43daSPeter Zijlstra if (!sched_feat(HRTICK)) 1664391e43daSPeter Zijlstra return 0; 1665391e43daSPeter Zijlstra if (!cpu_active(cpu_of(rq))) 1666391e43daSPeter Zijlstra return 0; 1667391e43daSPeter Zijlstra return hrtimer_is_hres_active(&rq->hrtick_timer); 1668391e43daSPeter Zijlstra } 1669391e43daSPeter Zijlstra 1670391e43daSPeter Zijlstra void hrtick_start(struct rq *rq, u64 delay); 1671391e43daSPeter Zijlstra 1672b39e66eaSMike Galbraith #else 1673b39e66eaSMike Galbraith 1674b39e66eaSMike Galbraith static inline int hrtick_enabled(struct rq *rq) 1675b39e66eaSMike Galbraith { 1676b39e66eaSMike Galbraith return 0; 1677b39e66eaSMike Galbraith } 1678b39e66eaSMike Galbraith 1679391e43daSPeter Zijlstra #endif /* CONFIG_SCHED_HRTICK */ 1680391e43daSPeter Zijlstra 1681dfbca41fSPeter Zijlstra #ifndef arch_scale_freq_capacity 1682dfbca41fSPeter Zijlstra static __always_inline 16837673c8a4SJuri Lelli unsigned long arch_scale_freq_capacity(int cpu) 1684dfbca41fSPeter Zijlstra { 1685dfbca41fSPeter Zijlstra return SCHED_CAPACITY_SCALE; 1686dfbca41fSPeter Zijlstra } 1687dfbca41fSPeter Zijlstra #endif 1688b5b4860dSVincent Guittot 16897e1a9208SJuri Lelli #ifdef CONFIG_SMP 16907e1a9208SJuri Lelli extern void sched_avg_update(struct rq *rq); 16917e1a9208SJuri Lelli 16928cd5601cSMorten Rasmussen #ifndef arch_scale_cpu_capacity 16938cd5601cSMorten Rasmussen static __always_inline 16948cd5601cSMorten Rasmussen unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) 16958cd5601cSMorten Rasmussen { 1696e3279a2eSDietmar Eggemann if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1)) 16978cd5601cSMorten Rasmussen return sd->smt_gain / sd->span_weight; 16988cd5601cSMorten Rasmussen 16998cd5601cSMorten Rasmussen return SCHED_CAPACITY_SCALE; 17008cd5601cSMorten Rasmussen } 17018cd5601cSMorten Rasmussen #endif 17028cd5601cSMorten Rasmussen 1703391e43daSPeter Zijlstra static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) 1704391e43daSPeter Zijlstra { 17057673c8a4SJuri Lelli rq->rt_avg += rt_delta * arch_scale_freq_capacity(cpu_of(rq)); 1706391e43daSPeter Zijlstra sched_avg_update(rq); 1707391e43daSPeter Zijlstra } 1708391e43daSPeter Zijlstra #else 17097e1a9208SJuri Lelli #ifndef arch_scale_cpu_capacity 17107e1a9208SJuri Lelli static __always_inline 17117e1a9208SJuri Lelli unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu) 17127e1a9208SJuri Lelli { 17137e1a9208SJuri Lelli return SCHED_CAPACITY_SCALE; 17147e1a9208SJuri Lelli } 17157e1a9208SJuri Lelli #endif 1716391e43daSPeter Zijlstra static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } 1717391e43daSPeter Zijlstra static inline void sched_avg_update(struct rq *rq) { } 1718391e43daSPeter Zijlstra #endif 1719391e43daSPeter Zijlstra 1720eb580751SPeter Zijlstra struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) 17213e71a462SPeter Zijlstra __acquires(rq->lock); 17228a8c69c3SPeter Zijlstra 1723eb580751SPeter Zijlstra struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) 17243960c8c0SPeter Zijlstra __acquires(p->pi_lock) 17253e71a462SPeter Zijlstra __acquires(rq->lock); 17263960c8c0SPeter Zijlstra 1727eb580751SPeter Zijlstra static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) 17283960c8c0SPeter Zijlstra __releases(rq->lock) 17293960c8c0SPeter Zijlstra { 1730d8ac8971SMatt Fleming rq_unpin_lock(rq, rf); 17313960c8c0SPeter Zijlstra raw_spin_unlock(&rq->lock); 17323960c8c0SPeter Zijlstra } 17333960c8c0SPeter Zijlstra 17343960c8c0SPeter Zijlstra static inline void 1735eb580751SPeter Zijlstra task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) 17363960c8c0SPeter Zijlstra __releases(rq->lock) 17373960c8c0SPeter Zijlstra __releases(p->pi_lock) 17383960c8c0SPeter Zijlstra { 1739d8ac8971SMatt Fleming rq_unpin_lock(rq, rf); 17403960c8c0SPeter Zijlstra raw_spin_unlock(&rq->lock); 1741eb580751SPeter Zijlstra raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); 17423960c8c0SPeter Zijlstra } 17433960c8c0SPeter Zijlstra 17448a8c69c3SPeter Zijlstra static inline void 17458a8c69c3SPeter Zijlstra rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) 17468a8c69c3SPeter Zijlstra __acquires(rq->lock) 17478a8c69c3SPeter Zijlstra { 17488a8c69c3SPeter Zijlstra raw_spin_lock_irqsave(&rq->lock, rf->flags); 17498a8c69c3SPeter Zijlstra rq_pin_lock(rq, rf); 17508a8c69c3SPeter Zijlstra } 17518a8c69c3SPeter Zijlstra 17528a8c69c3SPeter Zijlstra static inline void 17538a8c69c3SPeter Zijlstra rq_lock_irq(struct rq *rq, struct rq_flags *rf) 17548a8c69c3SPeter Zijlstra __acquires(rq->lock) 17558a8c69c3SPeter Zijlstra { 17568a8c69c3SPeter Zijlstra raw_spin_lock_irq(&rq->lock); 17578a8c69c3SPeter Zijlstra rq_pin_lock(rq, rf); 17588a8c69c3SPeter Zijlstra } 17598a8c69c3SPeter Zijlstra 17608a8c69c3SPeter Zijlstra static inline void 17618a8c69c3SPeter Zijlstra rq_lock(struct rq *rq, struct rq_flags *rf) 17628a8c69c3SPeter Zijlstra __acquires(rq->lock) 17638a8c69c3SPeter Zijlstra { 17648a8c69c3SPeter Zijlstra raw_spin_lock(&rq->lock); 17658a8c69c3SPeter Zijlstra rq_pin_lock(rq, rf); 17668a8c69c3SPeter Zijlstra } 17678a8c69c3SPeter Zijlstra 17688a8c69c3SPeter Zijlstra static inline void 17698a8c69c3SPeter Zijlstra rq_relock(struct rq *rq, struct rq_flags *rf) 17708a8c69c3SPeter Zijlstra __acquires(rq->lock) 17718a8c69c3SPeter Zijlstra { 17728a8c69c3SPeter Zijlstra raw_spin_lock(&rq->lock); 17738a8c69c3SPeter Zijlstra rq_repin_lock(rq, rf); 17748a8c69c3SPeter Zijlstra } 17758a8c69c3SPeter Zijlstra 17768a8c69c3SPeter Zijlstra static inline void 17778a8c69c3SPeter Zijlstra rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) 17788a8c69c3SPeter Zijlstra __releases(rq->lock) 17798a8c69c3SPeter Zijlstra { 17808a8c69c3SPeter Zijlstra rq_unpin_lock(rq, rf); 17818a8c69c3SPeter Zijlstra raw_spin_unlock_irqrestore(&rq->lock, rf->flags); 17828a8c69c3SPeter Zijlstra } 17838a8c69c3SPeter Zijlstra 17848a8c69c3SPeter Zijlstra static inline void 17858a8c69c3SPeter Zijlstra rq_unlock_irq(struct rq *rq, struct rq_flags *rf) 17868a8c69c3SPeter Zijlstra __releases(rq->lock) 17878a8c69c3SPeter Zijlstra { 17888a8c69c3SPeter Zijlstra rq_unpin_lock(rq, rf); 17898a8c69c3SPeter Zijlstra raw_spin_unlock_irq(&rq->lock); 17908a8c69c3SPeter Zijlstra } 17918a8c69c3SPeter Zijlstra 17928a8c69c3SPeter Zijlstra static inline void 17938a8c69c3SPeter Zijlstra rq_unlock(struct rq *rq, struct rq_flags *rf) 17948a8c69c3SPeter Zijlstra __releases(rq->lock) 17958a8c69c3SPeter Zijlstra { 17968a8c69c3SPeter Zijlstra rq_unpin_lock(rq, rf); 17978a8c69c3SPeter Zijlstra raw_spin_unlock(&rq->lock); 17988a8c69c3SPeter Zijlstra } 17998a8c69c3SPeter Zijlstra 1800391e43daSPeter Zijlstra #ifdef CONFIG_SMP 1801391e43daSPeter Zijlstra #ifdef CONFIG_PREEMPT 1802391e43daSPeter Zijlstra 1803391e43daSPeter Zijlstra static inline void double_rq_lock(struct rq *rq1, struct rq *rq2); 1804391e43daSPeter Zijlstra 1805391e43daSPeter Zijlstra /* 1806391e43daSPeter Zijlstra * fair double_lock_balance: Safely acquires both rq->locks in a fair 1807391e43daSPeter Zijlstra * way at the expense of forcing extra atomic operations in all 1808391e43daSPeter Zijlstra * invocations. This assures that the double_lock is acquired using the 1809391e43daSPeter Zijlstra * same underlying policy as the spinlock_t on this architecture, which 1810391e43daSPeter Zijlstra * reduces latency compared to the unfair variant below. However, it 1811391e43daSPeter Zijlstra * also adds more overhead and therefore may reduce throughput. 1812391e43daSPeter Zijlstra */ 1813391e43daSPeter Zijlstra static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) 1814391e43daSPeter Zijlstra __releases(this_rq->lock) 1815391e43daSPeter Zijlstra __acquires(busiest->lock) 1816391e43daSPeter Zijlstra __acquires(this_rq->lock) 1817391e43daSPeter Zijlstra { 1818391e43daSPeter Zijlstra raw_spin_unlock(&this_rq->lock); 1819391e43daSPeter Zijlstra double_rq_lock(this_rq, busiest); 1820391e43daSPeter Zijlstra 1821391e43daSPeter Zijlstra return 1; 1822391e43daSPeter Zijlstra } 1823391e43daSPeter Zijlstra 1824391e43daSPeter Zijlstra #else 1825391e43daSPeter Zijlstra /* 1826391e43daSPeter Zijlstra * Unfair double_lock_balance: Optimizes throughput at the expense of 1827391e43daSPeter Zijlstra * latency by eliminating extra atomic operations when the locks are 182897fb7a0aSIngo Molnar * already in proper order on entry. This favors lower CPU-ids and will 182997fb7a0aSIngo Molnar * grant the double lock to lower CPUs over higher ids under contention, 1830391e43daSPeter Zijlstra * regardless of entry order into the function. 1831391e43daSPeter Zijlstra */ 1832391e43daSPeter Zijlstra static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) 1833391e43daSPeter Zijlstra __releases(this_rq->lock) 1834391e43daSPeter Zijlstra __acquires(busiest->lock) 1835391e43daSPeter Zijlstra __acquires(this_rq->lock) 1836391e43daSPeter Zijlstra { 1837391e43daSPeter Zijlstra int ret = 0; 1838391e43daSPeter Zijlstra 1839391e43daSPeter Zijlstra if (unlikely(!raw_spin_trylock(&busiest->lock))) { 1840391e43daSPeter Zijlstra if (busiest < this_rq) { 1841391e43daSPeter Zijlstra raw_spin_unlock(&this_rq->lock); 1842391e43daSPeter Zijlstra raw_spin_lock(&busiest->lock); 1843391e43daSPeter Zijlstra raw_spin_lock_nested(&this_rq->lock, 1844391e43daSPeter Zijlstra SINGLE_DEPTH_NESTING); 1845391e43daSPeter Zijlstra ret = 1; 1846391e43daSPeter Zijlstra } else 1847391e43daSPeter Zijlstra raw_spin_lock_nested(&busiest->lock, 1848391e43daSPeter Zijlstra SINGLE_DEPTH_NESTING); 1849391e43daSPeter Zijlstra } 1850391e43daSPeter Zijlstra return ret; 1851391e43daSPeter Zijlstra } 1852391e43daSPeter Zijlstra 1853391e43daSPeter Zijlstra #endif /* CONFIG_PREEMPT */ 1854391e43daSPeter Zijlstra 1855391e43daSPeter Zijlstra /* 1856391e43daSPeter Zijlstra * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 1857391e43daSPeter Zijlstra */ 1858391e43daSPeter Zijlstra static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) 1859391e43daSPeter Zijlstra { 1860391e43daSPeter Zijlstra if (unlikely(!irqs_disabled())) { 186197fb7a0aSIngo Molnar /* printk() doesn't work well under rq->lock */ 1862391e43daSPeter Zijlstra raw_spin_unlock(&this_rq->lock); 1863391e43daSPeter Zijlstra BUG_ON(1); 1864391e43daSPeter Zijlstra } 1865391e43daSPeter Zijlstra 1866391e43daSPeter Zijlstra return _double_lock_balance(this_rq, busiest); 1867391e43daSPeter Zijlstra } 1868391e43daSPeter Zijlstra 1869391e43daSPeter Zijlstra static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) 1870391e43daSPeter Zijlstra __releases(busiest->lock) 1871391e43daSPeter Zijlstra { 1872391e43daSPeter Zijlstra raw_spin_unlock(&busiest->lock); 1873391e43daSPeter Zijlstra lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); 1874391e43daSPeter Zijlstra } 1875391e43daSPeter Zijlstra 187674602315SPeter Zijlstra static inline void double_lock(spinlock_t *l1, spinlock_t *l2) 187774602315SPeter Zijlstra { 187874602315SPeter Zijlstra if (l1 > l2) 187974602315SPeter Zijlstra swap(l1, l2); 188074602315SPeter Zijlstra 188174602315SPeter Zijlstra spin_lock(l1); 188274602315SPeter Zijlstra spin_lock_nested(l2, SINGLE_DEPTH_NESTING); 188374602315SPeter Zijlstra } 188474602315SPeter Zijlstra 188560e69eedSMike Galbraith static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2) 188660e69eedSMike Galbraith { 188760e69eedSMike Galbraith if (l1 > l2) 188860e69eedSMike Galbraith swap(l1, l2); 188960e69eedSMike Galbraith 189060e69eedSMike Galbraith spin_lock_irq(l1); 189160e69eedSMike Galbraith spin_lock_nested(l2, SINGLE_DEPTH_NESTING); 189260e69eedSMike Galbraith } 189360e69eedSMike Galbraith 189474602315SPeter Zijlstra static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) 189574602315SPeter Zijlstra { 189674602315SPeter Zijlstra if (l1 > l2) 189774602315SPeter Zijlstra swap(l1, l2); 189874602315SPeter Zijlstra 189974602315SPeter Zijlstra raw_spin_lock(l1); 190074602315SPeter Zijlstra raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING); 190174602315SPeter Zijlstra } 190274602315SPeter Zijlstra 1903391e43daSPeter Zijlstra /* 1904391e43daSPeter Zijlstra * double_rq_lock - safely lock two runqueues 1905391e43daSPeter Zijlstra * 1906391e43daSPeter Zijlstra * Note this does not disable interrupts like task_rq_lock, 1907391e43daSPeter Zijlstra * you need to do so manually before calling. 1908391e43daSPeter Zijlstra */ 1909391e43daSPeter Zijlstra static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) 1910391e43daSPeter Zijlstra __acquires(rq1->lock) 1911391e43daSPeter Zijlstra __acquires(rq2->lock) 1912391e43daSPeter Zijlstra { 1913391e43daSPeter Zijlstra BUG_ON(!irqs_disabled()); 1914391e43daSPeter Zijlstra if (rq1 == rq2) { 1915391e43daSPeter Zijlstra raw_spin_lock(&rq1->lock); 1916391e43daSPeter Zijlstra __acquire(rq2->lock); /* Fake it out ;) */ 1917391e43daSPeter Zijlstra } else { 1918391e43daSPeter Zijlstra if (rq1 < rq2) { 1919391e43daSPeter Zijlstra raw_spin_lock(&rq1->lock); 1920391e43daSPeter Zijlstra raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); 1921391e43daSPeter Zijlstra } else { 1922391e43daSPeter Zijlstra raw_spin_lock(&rq2->lock); 1923391e43daSPeter Zijlstra raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); 1924391e43daSPeter Zijlstra } 1925391e43daSPeter Zijlstra } 1926391e43daSPeter Zijlstra } 1927391e43daSPeter Zijlstra 1928391e43daSPeter Zijlstra /* 1929391e43daSPeter Zijlstra * double_rq_unlock - safely unlock two runqueues 1930391e43daSPeter Zijlstra * 1931391e43daSPeter Zijlstra * Note this does not restore interrupts like task_rq_unlock, 1932391e43daSPeter Zijlstra * you need to do so manually after calling. 1933391e43daSPeter Zijlstra */ 1934391e43daSPeter Zijlstra static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) 1935391e43daSPeter Zijlstra __releases(rq1->lock) 1936391e43daSPeter Zijlstra __releases(rq2->lock) 1937391e43daSPeter Zijlstra { 1938391e43daSPeter Zijlstra raw_spin_unlock(&rq1->lock); 1939391e43daSPeter Zijlstra if (rq1 != rq2) 1940391e43daSPeter Zijlstra raw_spin_unlock(&rq2->lock); 1941391e43daSPeter Zijlstra else 1942391e43daSPeter Zijlstra __release(rq2->lock); 1943391e43daSPeter Zijlstra } 1944391e43daSPeter Zijlstra 1945f2cb1360SIngo Molnar extern void set_rq_online (struct rq *rq); 1946f2cb1360SIngo Molnar extern void set_rq_offline(struct rq *rq); 1947f2cb1360SIngo Molnar extern bool sched_smp_initialized; 1948f2cb1360SIngo Molnar 1949391e43daSPeter Zijlstra #else /* CONFIG_SMP */ 1950391e43daSPeter Zijlstra 1951391e43daSPeter Zijlstra /* 1952391e43daSPeter Zijlstra * double_rq_lock - safely lock two runqueues 1953391e43daSPeter Zijlstra * 1954391e43daSPeter Zijlstra * Note this does not disable interrupts like task_rq_lock, 1955391e43daSPeter Zijlstra * you need to do so manually before calling. 1956391e43daSPeter Zijlstra */ 1957391e43daSPeter Zijlstra static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) 1958391e43daSPeter Zijlstra __acquires(rq1->lock) 1959391e43daSPeter Zijlstra __acquires(rq2->lock) 1960391e43daSPeter Zijlstra { 1961391e43daSPeter Zijlstra BUG_ON(!irqs_disabled()); 1962391e43daSPeter Zijlstra BUG_ON(rq1 != rq2); 1963391e43daSPeter Zijlstra raw_spin_lock(&rq1->lock); 1964391e43daSPeter Zijlstra __acquire(rq2->lock); /* Fake it out ;) */ 1965391e43daSPeter Zijlstra } 1966391e43daSPeter Zijlstra 1967391e43daSPeter Zijlstra /* 1968391e43daSPeter Zijlstra * double_rq_unlock - safely unlock two runqueues 1969391e43daSPeter Zijlstra * 1970391e43daSPeter Zijlstra * Note this does not restore interrupts like task_rq_unlock, 1971391e43daSPeter Zijlstra * you need to do so manually after calling. 1972391e43daSPeter Zijlstra */ 1973391e43daSPeter Zijlstra static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) 1974391e43daSPeter Zijlstra __releases(rq1->lock) 1975391e43daSPeter Zijlstra __releases(rq2->lock) 1976391e43daSPeter Zijlstra { 1977391e43daSPeter Zijlstra BUG_ON(rq1 != rq2); 1978391e43daSPeter Zijlstra raw_spin_unlock(&rq1->lock); 1979391e43daSPeter Zijlstra __release(rq2->lock); 1980391e43daSPeter Zijlstra } 1981391e43daSPeter Zijlstra 1982391e43daSPeter Zijlstra #endif 1983391e43daSPeter Zijlstra 1984391e43daSPeter Zijlstra extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); 1985391e43daSPeter Zijlstra extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); 19866b55c965SSrikar Dronamraju 19876b55c965SSrikar Dronamraju #ifdef CONFIG_SCHED_DEBUG 19889469eb01SPeter Zijlstra extern bool sched_debug_enabled; 19899469eb01SPeter Zijlstra 1990391e43daSPeter Zijlstra extern void print_cfs_stats(struct seq_file *m, int cpu); 1991391e43daSPeter Zijlstra extern void print_rt_stats(struct seq_file *m, int cpu); 1992acb32132SWanpeng Li extern void print_dl_stats(struct seq_file *m, int cpu); 19936b55c965SSrikar Dronamraju extern void 19946b55c965SSrikar Dronamraju print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); 1995397f2378SSrikar Dronamraju #ifdef CONFIG_NUMA_BALANCING 1996397f2378SSrikar Dronamraju extern void 1997397f2378SSrikar Dronamraju show_numa_stats(struct task_struct *p, struct seq_file *m); 1998397f2378SSrikar Dronamraju extern void 1999397f2378SSrikar Dronamraju print_numa_stats(struct seq_file *m, int node, unsigned long tsf, 2000397f2378SSrikar Dronamraju unsigned long tpf, unsigned long gsf, unsigned long gpf); 2001397f2378SSrikar Dronamraju #endif /* CONFIG_NUMA_BALANCING */ 2002397f2378SSrikar Dronamraju #endif /* CONFIG_SCHED_DEBUG */ 2003391e43daSPeter Zijlstra 2004391e43daSPeter Zijlstra extern void init_cfs_rq(struct cfs_rq *cfs_rq); 200507c54f7aSAbel Vesa extern void init_rt_rq(struct rt_rq *rt_rq); 200607c54f7aSAbel Vesa extern void init_dl_rq(struct dl_rq *dl_rq); 2007391e43daSPeter Zijlstra 20081ee14e6cSBen Segall extern void cfs_bandwidth_usage_inc(void); 20091ee14e6cSBen Segall extern void cfs_bandwidth_usage_dec(void); 20101c792db7SSuresh Siddha 20113451d024SFrederic Weisbecker #ifdef CONFIG_NO_HZ_COMMON 20121c792db7SSuresh Siddha enum rq_nohz_flag_bits { 20131c792db7SSuresh Siddha NOHZ_TICK_STOPPED, 20141c792db7SSuresh Siddha NOHZ_BALANCE_KICK, 20151c792db7SSuresh Siddha }; 20161c792db7SSuresh Siddha 20171c792db7SSuresh Siddha #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) 201820a5c8ccSThomas Gleixner 201920a5c8ccSThomas Gleixner extern void nohz_balance_exit_idle(unsigned int cpu); 202020a5c8ccSThomas Gleixner #else 202120a5c8ccSThomas Gleixner static inline void nohz_balance_exit_idle(unsigned int cpu) { } 20221c792db7SSuresh Siddha #endif 202373fbec60SFrederic Weisbecker 2024daec5798SLuca Abeni 2025daec5798SLuca Abeni #ifdef CONFIG_SMP 2026daec5798SLuca Abeni static inline 2027daec5798SLuca Abeni void __dl_update(struct dl_bw *dl_b, s64 bw) 2028daec5798SLuca Abeni { 2029daec5798SLuca Abeni struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw); 2030daec5798SLuca Abeni int i; 2031daec5798SLuca Abeni 2032daec5798SLuca Abeni RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), 2033daec5798SLuca Abeni "sched RCU must be held"); 2034daec5798SLuca Abeni for_each_cpu_and(i, rd->span, cpu_active_mask) { 2035daec5798SLuca Abeni struct rq *rq = cpu_rq(i); 2036daec5798SLuca Abeni 2037daec5798SLuca Abeni rq->dl.extra_bw += bw; 2038daec5798SLuca Abeni } 2039daec5798SLuca Abeni } 2040daec5798SLuca Abeni #else 2041daec5798SLuca Abeni static inline 2042daec5798SLuca Abeni void __dl_update(struct dl_bw *dl_b, s64 bw) 2043daec5798SLuca Abeni { 2044daec5798SLuca Abeni struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw); 2045daec5798SLuca Abeni 2046daec5798SLuca Abeni dl->extra_bw += bw; 2047daec5798SLuca Abeni } 2048daec5798SLuca Abeni #endif 2049daec5798SLuca Abeni 2050daec5798SLuca Abeni 205173fbec60SFrederic Weisbecker #ifdef CONFIG_IRQ_TIME_ACCOUNTING 205219d23dbfSFrederic Weisbecker struct irqtime { 205325e2d8c1SFrederic Weisbecker u64 total; 2054a499a5a1SFrederic Weisbecker u64 tick_delta; 205519d23dbfSFrederic Weisbecker u64 irq_start_time; 205619d23dbfSFrederic Weisbecker struct u64_stats_sync sync; 205719d23dbfSFrederic Weisbecker }; 205873fbec60SFrederic Weisbecker 205919d23dbfSFrederic Weisbecker DECLARE_PER_CPU(struct irqtime, cpu_irqtime); 206073fbec60SFrederic Weisbecker 206125e2d8c1SFrederic Weisbecker /* 206225e2d8c1SFrederic Weisbecker * Returns the irqtime minus the softirq time computed by ksoftirqd. 206325e2d8c1SFrederic Weisbecker * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime 206425e2d8c1SFrederic Weisbecker * and never move forward. 206525e2d8c1SFrederic Weisbecker */ 206673fbec60SFrederic Weisbecker static inline u64 irq_time_read(int cpu) 206773fbec60SFrederic Weisbecker { 206819d23dbfSFrederic Weisbecker struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); 206919d23dbfSFrederic Weisbecker unsigned int seq; 207019d23dbfSFrederic Weisbecker u64 total; 207173fbec60SFrederic Weisbecker 207273fbec60SFrederic Weisbecker do { 207319d23dbfSFrederic Weisbecker seq = __u64_stats_fetch_begin(&irqtime->sync); 207425e2d8c1SFrederic Weisbecker total = irqtime->total; 207519d23dbfSFrederic Weisbecker } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); 207673fbec60SFrederic Weisbecker 207719d23dbfSFrederic Weisbecker return total; 207873fbec60SFrederic Weisbecker } 207973fbec60SFrederic Weisbecker #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 2080adaf9fcdSRafael J. Wysocki 2081adaf9fcdSRafael J. Wysocki #ifdef CONFIG_CPU_FREQ 2082adaf9fcdSRafael J. Wysocki DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); 2083adaf9fcdSRafael J. Wysocki 2084adaf9fcdSRafael J. Wysocki /** 2085adaf9fcdSRafael J. Wysocki * cpufreq_update_util - Take a note about CPU utilization changes. 208612bde33dSRafael J. Wysocki * @rq: Runqueue to carry out the update for. 208758919e83SRafael J. Wysocki * @flags: Update reason flags. 2088adaf9fcdSRafael J. Wysocki * 208958919e83SRafael J. Wysocki * This function is called by the scheduler on the CPU whose utilization is 209058919e83SRafael J. Wysocki * being updated. 2091adaf9fcdSRafael J. Wysocki * 2092adaf9fcdSRafael J. Wysocki * It can only be called from RCU-sched read-side critical sections. 2093adaf9fcdSRafael J. Wysocki * 2094adaf9fcdSRafael J. Wysocki * The way cpufreq is currently arranged requires it to evaluate the CPU 2095adaf9fcdSRafael J. Wysocki * performance state (frequency/voltage) on a regular basis to prevent it from 2096adaf9fcdSRafael J. Wysocki * being stuck in a completely inadequate performance level for too long. 2097e0367b12SJuri Lelli * That is not guaranteed to happen if the updates are only triggered from CFS 2098e0367b12SJuri Lelli * and DL, though, because they may not be coming in if only RT tasks are 2099e0367b12SJuri Lelli * active all the time (or there are RT tasks only). 2100adaf9fcdSRafael J. Wysocki * 2101e0367b12SJuri Lelli * As a workaround for that issue, this function is called periodically by the 2102e0367b12SJuri Lelli * RT sched class to trigger extra cpufreq updates to prevent it from stalling, 2103adaf9fcdSRafael J. Wysocki * but that really is a band-aid. Going forward it should be replaced with 2104e0367b12SJuri Lelli * solutions targeted more specifically at RT tasks. 2105adaf9fcdSRafael J. Wysocki */ 210612bde33dSRafael J. Wysocki static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) 2107adaf9fcdSRafael J. Wysocki { 210858919e83SRafael J. Wysocki struct update_util_data *data; 210958919e83SRafael J. Wysocki 2110674e7541SViresh Kumar data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, 2111674e7541SViresh Kumar cpu_of(rq))); 211258919e83SRafael J. Wysocki if (data) 211312bde33dSRafael J. Wysocki data->func(data, rq_clock(rq), flags); 211412bde33dSRafael J. Wysocki } 2115adaf9fcdSRafael J. Wysocki #else 211612bde33dSRafael J. Wysocki static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} 2117adaf9fcdSRafael J. Wysocki #endif /* CONFIG_CPU_FREQ */ 2118be53f58fSLinus Torvalds 21199bdcb44eSRafael J. Wysocki #ifdef arch_scale_freq_capacity 21209bdcb44eSRafael J. Wysocki # ifndef arch_scale_freq_invariant 212197fb7a0aSIngo Molnar # define arch_scale_freq_invariant() true 21229bdcb44eSRafael J. Wysocki # endif 212397fb7a0aSIngo Molnar #else 212497fb7a0aSIngo Molnar # define arch_scale_freq_invariant() false 21259bdcb44eSRafael J. Wysocki #endif 2126d4edd662SJuri Lelli 2127794a56ebSJuri Lelli #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL 2128d4edd662SJuri Lelli static inline unsigned long cpu_util_dl(struct rq *rq) 2129d4edd662SJuri Lelli { 2130d4edd662SJuri Lelli return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; 2131d4edd662SJuri Lelli } 2132d4edd662SJuri Lelli 2133d4edd662SJuri Lelli static inline unsigned long cpu_util_cfs(struct rq *rq) 2134d4edd662SJuri Lelli { 2135d4edd662SJuri Lelli return rq->cfs.avg.util_avg; 2136d4edd662SJuri Lelli } 2137794a56ebSJuri Lelli #endif 2138